From d5a345782628e445127352bf8c9b7d65c8c82587 Mon Sep 17 00:00:00 2001 From: Kial Date: Fri, 26 Jul 2024 08:39:11 -0700 Subject: [PATCH] SOLR/API/IMPORTER - major infrastructure update (#271) * SOLR/API/IMPORTER - major infrastructure update Signed-off-by: Kial Jinnah * cleanup Signed-off-by: Kial Jinnah * fix test setup attempt Signed-off-by: Kial Jinnah * downgrade deps to be the same as bor Signed-off-by: Kial Jinnah --------- Signed-off-by: Kial Jinnah --- .gitignore | 6 +- search-api/.env.sample | 95 +- search-api/migrations/alembic.ini | 1 + .../versions/20240723_131259_eaf25b9a20bf_.py | 42 + search-api/requirements.txt | 106 +- .../requirements/bcregistry-libraries.txt | 1 - search-api/setup.cfg | 2 +- search-api/src/search_api/__init__.py | 13 +- search-api/src/search_api/config.py | 25 +- .../src/search_api/exceptions/__init__.py | 38 +- search-api/src/search_api/models/solr_doc.py | 5 + .../src/search_api/models/solr_doc_event.py | 36 + .../search_api/request_handlers/__init__.py | 3 +- .../src/search_api/request_handlers/search.py | 172 - .../request_handlers/update_solr_handler.py | 61 +- .../src/search_api/resources/__init__.py | 5 + .../src/search_api/resources/constants.py | 1 + search-api/src/search_api/resources/utils.py | 138 +- .../businesses/documents/document_request.py | 2 +- .../resources/v1/businesses/search.py | 227 +- .../resources/v1/internal/__init__.py | 4 +- .../resources/v1/internal/solr/__init__.py | 25 + .../resources/v1/internal/solr/command.py | 52 + .../resources/v1/internal/solr/imports.py | 60 + .../v1/internal/solr/update/__init__.py | 126 + .../v1/internal/solr/update/resync.py | 67 + .../resources/v1/internal/solr/update/sync.py | 135 + .../resources/v1/internal/update_solr.py | 161 - .../src/search_api/resources/v1/meta.py | 3 +- .../src/search_api/services/__init__.py | 6 +- .../search_api/services/base_solr/__init__.py | 204 + .../base_solr/utils/__init__.py} | 14 +- .../base_solr/utils/formatting_helpers.py | 62 + .../services/base_solr/utils/query_builder.py | 150 + .../services/base_solr/utils/query_params.py | 34 + .../services/business_solr/__init__.py | 64 + .../business_solr/doc_fields/__init__.py | 29 + .../business_solr/doc_fields/business.py | 53 + .../doc_fields/party.py} | 41 +- .../business_solr/doc_models/__init__.py | 16 + .../doc_models/business.py} | 25 +- .../business_solr/doc_models/party.py | 32 + .../services/business_solr/utils/__init__.py | 17 + .../utils/add_category_filters.py | 27 + .../business_solr/utils/business_search.py | 61 + .../business_solr/utils/business_suggest.py | 58 + .../business_solr/utils/parties_search.py | 55 + search-api/src/search_api/services/flags.py | 32 +- .../src/search_api/services/solr/solr.py | 304 -- search-api/src/search_api/version.py | 2 +- .../search-api.postman_collection.json | 1469 ++++--- .../tests/unit/api/businesses/test_search.py | 270 -- .../tests/unit/api/internal/__init__.py | 2 +- .../unit/api/internal}/solr/__init__.py | 5 +- .../unit/api/internal/solr/test_backup.py | 91 + .../unit/api/internal/solr/test_import.py | 131 + .../unit/api/internal/solr/update/__init__.py | 30 + .../internal/solr/update/test_resync_solr.py | 178 + .../internal/solr/update/test_update_solr.py | 212 + .../unit/api/internal/test_update_solr.py | 209 - search-api/tests/unit/api/search/__init__.py | 14 + .../tests/unit/api/search/test_facets.py | 398 ++ .../tests/unit/api/search/test_parties.py | 242 ++ search-api/tests/unit/api/search/util.py | 26 + search-api/tests/unit/api/test_meta.py | 3 - search-api/tests/unit/models/test_solr_doc.py | 8 +- .../tests/unit/services/test_solr/__init__.py | 82 - .../services/test_solr/test_solr_basic.py | 123 - .../services/test_solr/test_solr_query.py | 118 - search-api/tests/unit/utils/__init__.py | 64 + search-solr-importer/.env.sample | 72 +- search-solr-importer/data_import_handler.py | 232 +- search-solr-importer/devops/vaults.json | 7 + .../src/search_solr_importer/__init__.py | 4 +- .../src/search_solr_importer/config.py | 43 +- .../search_solr_importer/utils/__init__.py | 6 +- .../utils/data_collection.py | 23 + .../utils/data_parsing.py | 142 +- .../src/search_solr_importer/utils/reindex.py | 124 + .../search_solr_importer/utils/update_solr.py | 139 + .../src/search_solr_importer/version.py | 2 +- search-solr/.gitignore | 2 + search-solr/Dockerfile | 39 +- search-solr/Makefile | 57 + search-solr/README.md | 51 +- search-solr/action.yml | 4 +- .../bitnami/solr/server/solr/README.md | 79 - .../_default/conf/managed-schema.xml | 1031 ----- .../conf/_rest_managed.json | 1 - .../_schema_analysis_stopwords_english.json | 38 - .../_schema_analysis_synonyms_english.json | 11 - .../conf/currency.xml | 67 - .../conf/elevate.xml | 42 - .../conf/lang/contractions_ca.txt | 8 - .../conf/lang/contractions_fr.txt | 15 - .../conf/lang/contractions_ga.txt | 5 - .../conf/lang/contractions_it.txt | 23 - .../conf/lang/hyphenations_ga.txt | 5 - .../conf/lang/stemdict_nl.txt | 6 - .../conf/lang/stoptags_ja.txt | 420 -- .../conf/lang/stopwords_ar.txt | 125 - .../conf/lang/stopwords_bg.txt | 193 - .../conf/lang/stopwords_ca.txt | 220 - .../conf/lang/stopwords_ckb.txt | 136 - .../conf/lang/stopwords_cz.txt | 172 - .../conf/lang/stopwords_da.txt | 110 - .../conf/lang/stopwords_de.txt | 294 -- .../conf/lang/stopwords_el.txt | 78 - .../conf/lang/stopwords_en.txt | 54 - .../conf/lang/stopwords_es.txt | 356 -- .../conf/lang/stopwords_et.txt | 1603 ------- .../conf/lang/stopwords_eu.txt | 99 - .../conf/lang/stopwords_fa.txt | 313 -- .../conf/lang/stopwords_fi.txt | 97 - .../conf/lang/stopwords_fr.txt | 186 - .../conf/lang/stopwords_ga.txt | 110 - .../conf/lang/stopwords_gl.txt | 161 - .../conf/lang/stopwords_hi.txt | 235 - .../conf/lang/stopwords_hu.txt | 211 - .../conf/lang/stopwords_hy.txt | 46 - .../conf/lang/stopwords_id.txt | 359 -- .../conf/lang/stopwords_it.txt | 303 -- .../conf/lang/stopwords_ja.txt | 127 - .../conf/lang/stopwords_lv.txt | 172 - .../conf/lang/stopwords_nl.txt | 119 - .../conf/lang/stopwords_no.txt | 194 - .../conf/lang/stopwords_pt.txt | 253 -- .../conf/lang/stopwords_ro.txt | 233 - .../conf/lang/stopwords_ru.txt | 243 -- .../conf/lang/stopwords_sv.txt | 133 - .../conf/lang/stopwords_th.txt | 119 - .../conf/lang/stopwords_tr.txt | 212 - .../conf/lang/userdict_ja.txt | 29 - .../conf/managed-schema | 1202 ------ .../conf/mapping-FoldToASCII.txt | 3813 ----------------- .../conf/mapping-ISOLatin1Accent.txt | 246 -- .../conf/params.json | 11 - .../conf/protwords.txt | 21 - .../conf/spellings.txt | 2 - .../conf/stopwords.txt | 14 - .../conf/synonyms.txt | 29 - .../conf/update-script.js | 53 - .../conf/xslt/example.xsl | 132 - .../conf/xslt/example_atom.xsl | 67 - .../conf/xslt/example_rss.xsl | 66 - .../conf/xslt/luke.xsl | 337 -- .../conf/xslt/updateXml.xsl | 74 - .../solr/search/conf/lang/contractions_ca.txt | 8 - .../solr/search/conf/lang/contractions_fr.txt | 15 - .../solr/search/conf/lang/contractions_ga.txt | 5 - .../solr/search/conf/lang/contractions_it.txt | 23 - .../solr/search/conf/lang/hyphenations_ga.txt | 5 - .../solr/search/conf/lang/stemdict_nl.txt | 6 - .../solr/search/conf/lang/stoptags_ja.txt | 420 -- .../solr/search/conf/lang/stopwords_ar.txt | 125 - .../solr/search/conf/lang/stopwords_bg.txt | 193 - .../solr/search/conf/lang/stopwords_ca.txt | 220 - .../solr/search/conf/lang/stopwords_cz.txt | 172 - .../solr/search/conf/lang/stopwords_da.txt | 110 - .../solr/search/conf/lang/stopwords_de.txt | 294 -- .../solr/search/conf/lang/stopwords_el.txt | 78 - .../solr/search/conf/lang/stopwords_en.txt | 54 - .../solr/search/conf/lang/stopwords_es.txt | 356 -- .../solr/search/conf/lang/stopwords_et.txt | 1603 ------- .../solr/search/conf/lang/stopwords_eu.txt | 99 - .../solr/search/conf/lang/stopwords_fa.txt | 313 -- .../solr/search/conf/lang/stopwords_fi.txt | 97 - .../solr/search/conf/lang/stopwords_fr.txt | 186 - .../solr/search/conf/lang/stopwords_ga.txt | 110 - .../solr/search/conf/lang/stopwords_gl.txt | 161 - .../solr/search/conf/lang/stopwords_hi.txt | 235 - .../solr/search/conf/lang/stopwords_hu.txt | 211 - .../solr/search/conf/lang/stopwords_hy.txt | 46 - .../solr/search/conf/lang/stopwords_id.txt | 359 -- .../solr/search/conf/lang/stopwords_it.txt | 303 -- .../solr/search/conf/lang/stopwords_ja.txt | 127 - .../solr/search/conf/lang/stopwords_lv.txt | 172 - .../solr/search/conf/lang/stopwords_nl.txt | 119 - .../solr/search/conf/lang/stopwords_no.txt | 194 - .../solr/search/conf/lang/stopwords_pt.txt | 253 -- .../solr/search/conf/lang/stopwords_ro.txt | 233 - .../solr/search/conf/lang/stopwords_ru.txt | 243 -- .../solr/search/conf/lang/stopwords_sv.txt | 133 - .../solr/search/conf/lang/stopwords_th.txt | 119 - .../solr/search/conf/lang/stopwords_tr.txt | 212 - .../solr/search/conf/lang/userdict_ja.txt | 29 - .../server/solr/search/conf/protwords.txt | 21 - .../server/solr/search/conf/stopwords.txt | 14 - .../solr/server/solr/search/conf/synonyms.txt | 29 - search-solr/bitnami/solr/server/solr/zoo.cfg | 37 - .../business}/conf/lang/contractions_ca.txt | 0 .../business}/conf/lang/contractions_fr.txt | 0 .../business}/conf/lang/contractions_ga.txt | 0 .../business}/conf/lang/contractions_it.txt | 0 .../business}/conf/lang/hyphenations_ga.txt | 0 .../business}/conf/lang/stemdict_nl.txt | 0 .../business}/conf/lang/stoptags_ja.txt | 0 .../business}/conf/lang/stopwords_ar.txt | 0 .../business}/conf/lang/stopwords_bg.txt | 0 .../business}/conf/lang/stopwords_ca.txt | 0 .../business}/conf/lang/stopwords_cz.txt | 0 .../business}/conf/lang/stopwords_da.txt | 0 .../business}/conf/lang/stopwords_de.txt | 0 .../business}/conf/lang/stopwords_el.txt | 0 .../business}/conf/lang/stopwords_en.txt | 0 .../business}/conf/lang/stopwords_es.txt | 0 .../business}/conf/lang/stopwords_et.txt | 0 .../business}/conf/lang/stopwords_eu.txt | 0 .../business}/conf/lang/stopwords_fa.txt | 0 .../business}/conf/lang/stopwords_fi.txt | 0 .../business}/conf/lang/stopwords_fr.txt | 0 .../business}/conf/lang/stopwords_ga.txt | 0 .../business}/conf/lang/stopwords_gl.txt | 0 .../business}/conf/lang/stopwords_hi.txt | 0 .../business}/conf/lang/stopwords_hu.txt | 0 .../business}/conf/lang/stopwords_hy.txt | 0 .../business}/conf/lang/stopwords_id.txt | 0 .../business}/conf/lang/stopwords_it.txt | 0 .../business}/conf/lang/stopwords_ja.txt | 0 .../business}/conf/lang/stopwords_lv.txt | 0 .../business}/conf/lang/stopwords_nl.txt | 0 .../business}/conf/lang/stopwords_no.txt | 0 .../business}/conf/lang/stopwords_pt.txt | 0 .../business}/conf/lang/stopwords_ro.txt | 0 .../business}/conf/lang/stopwords_ru.txt | 0 .../business}/conf/lang/stopwords_sv.txt | 0 .../business}/conf/lang/stopwords_th.txt | 0 .../business}/conf/lang/stopwords_tr.txt | 0 .../business}/conf/lang/userdict_ja.txt | 0 .../business}/conf/managed-schema.xml | 111 +- .../business}/conf/protwords.txt | 0 .../business}/conf/solrconfig.xml | 806 ++-- .../business}/conf/stopwords.txt | 0 .../business}/conf/synonyms.txt | 0 .../unused-language-fieldtypes.xml | 0 .../search => solr/business}/core.properties | 2 +- search-solr/solr/business/solr.in.sh | 287 ++ .../business_follower}/conf/solrconfig.xml | 102 +- .../solr/business_follower/core.properties | 4 + .../{bitnami/solr/server => }/solr/solr.xml | 10 +- 240 files changed, 5513 insertions(+), 26184 deletions(-) create mode 100644 search-api/migrations/versions/20240723_131259_eaf25b9a20bf_.py delete mode 100644 search-api/src/search_api/request_handlers/search.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/__init__.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/command.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/imports.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/update/__init__.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/update/resync.py create mode 100644 search-api/src/search_api/resources/v1/internal/solr/update/sync.py delete mode 100644 search-api/src/search_api/resources/v1/internal/update_solr.py create mode 100644 search-api/src/search_api/services/base_solr/__init__.py rename search-api/src/search_api/{schemas.py => services/base_solr/utils/__init__.py} (62%) create mode 100644 search-api/src/search_api/services/base_solr/utils/formatting_helpers.py create mode 100644 search-api/src/search_api/services/base_solr/utils/query_builder.py create mode 100644 search-api/src/search_api/services/base_solr/utils/query_params.py create mode 100644 search-api/src/search_api/services/business_solr/__init__.py create mode 100644 search-api/src/search_api/services/business_solr/doc_fields/__init__.py create mode 100644 search-api/src/search_api/services/business_solr/doc_fields/business.py rename search-api/src/search_api/services/{solr/solr_fields.py => business_solr/doc_fields/party.py} (66%) create mode 100644 search-api/src/search_api/services/business_solr/doc_models/__init__.py rename search-api/src/search_api/services/{solr/solr_docs.py => business_solr/doc_models/business.py} (60%) create mode 100644 search-api/src/search_api/services/business_solr/doc_models/party.py create mode 100644 search-api/src/search_api/services/business_solr/utils/__init__.py create mode 100644 search-api/src/search_api/services/business_solr/utils/add_category_filters.py create mode 100644 search-api/src/search_api/services/business_solr/utils/business_search.py create mode 100644 search-api/src/search_api/services/business_solr/utils/business_suggest.py create mode 100644 search-api/src/search_api/services/business_solr/utils/parties_search.py delete mode 100644 search-api/src/search_api/services/solr/solr.py delete mode 100644 search-api/tests/unit/api/businesses/test_search.py rename search-api/{src/search_api/services => tests/unit/api/internal}/solr/__init__.py (78%) create mode 100644 search-api/tests/unit/api/internal/solr/test_backup.py create mode 100644 search-api/tests/unit/api/internal/solr/test_import.py create mode 100644 search-api/tests/unit/api/internal/solr/update/__init__.py create mode 100644 search-api/tests/unit/api/internal/solr/update/test_resync_solr.py create mode 100644 search-api/tests/unit/api/internal/solr/update/test_update_solr.py delete mode 100644 search-api/tests/unit/api/internal/test_update_solr.py create mode 100644 search-api/tests/unit/api/search/__init__.py create mode 100644 search-api/tests/unit/api/search/test_facets.py create mode 100644 search-api/tests/unit/api/search/test_parties.py create mode 100644 search-api/tests/unit/api/search/util.py delete mode 100644 search-api/tests/unit/services/test_solr/__init__.py delete mode 100644 search-api/tests/unit/services/test_solr/test_solr_basic.py delete mode 100644 search-api/tests/unit/services/test_solr/test_solr_query.py create mode 100644 search-solr-importer/src/search_solr_importer/utils/reindex.py create mode 100644 search-solr-importer/src/search_solr_importer/utils/update_solr.py create mode 100644 search-solr/.gitignore create mode 100644 search-solr/Makefile delete mode 100644 search-solr/bitnami/solr/server/solr/README.md delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/_default/conf/managed-schema.xml delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_rest_managed.json delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_stopwords_english.json delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_synonyms_english.json delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/currency.xml delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ca.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_fr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_it.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/hyphenations_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stemdict_nl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stoptags_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ar.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_bg.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ca.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ckb.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_cz.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_da.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_de.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_el.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_en.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_es.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_et.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_eu.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fa.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fi.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_gl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hi.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hu.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hy.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_id.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_it.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_lv.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_nl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_no.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_pt.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ro.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ru.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_sv.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_th.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_tr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/userdict_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-FoldToASCII.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-ISOLatin1Accent.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/params.json delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/protwords.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/spellings.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/stopwords.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/update-script.js delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example.xsl delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_atom.xsl delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_rss.xsl delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/luke.xsl delete mode 100644 search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/updateXml.xsl delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ca.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_fr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_it.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/hyphenations_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stemdict_nl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stoptags_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ar.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_bg.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ca.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_cz.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_da.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_de.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_el.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_en.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_es.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_et.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_eu.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fa.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fi.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ga.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_gl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hi.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hu.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hy.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_id.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_it.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_lv.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_nl.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_no.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_pt.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ro.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ru.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_sv.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_th.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_tr.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/lang/userdict_ja.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/protwords.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/stopwords.txt delete mode 100644 search-solr/bitnami/solr/server/solr/search/conf/synonyms.txt delete mode 100644 search-solr/bitnami/solr/server/solr/zoo.cfg rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/contractions_ca.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/contractions_fr.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/contractions_ga.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/contractions_it.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/hyphenations_ga.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stemdict_nl.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stoptags_ja.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ar.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_bg.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ca.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_cz.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_da.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_de.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_el.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_en.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_es.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_et.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_eu.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_fa.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_fi.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_fr.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ga.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_gl.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_hi.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_hu.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_hy.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_id.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_it.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ja.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_lv.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_nl.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_no.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_pt.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ro.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_ru.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_sv.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_th.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/stopwords_tr.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/lang/userdict_ja.txt (100%) rename search-solr/{bitnami/solr/server/solr/search => solr/business}/conf/managed-schema.xml (83%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/protwords.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/sample_techproducts_configs => solr/business}/conf/solrconfig.xml (64%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/stopwords.txt (100%) rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business}/conf/synonyms.txt (100%) rename search-solr/{bitnami/solr/server/solr/search => solr/business}/conf/unused-field-types/unused-language-fieldtypes.xml (100%) rename search-solr/{bitnami/solr/server/solr/search => solr/business}/core.properties (84%) create mode 100644 search-solr/solr/business/solr.in.sh rename search-solr/{bitnami/solr/server/solr/configsets/_default => solr/business_follower}/conf/solrconfig.xml (93%) create mode 100644 search-solr/solr/business_follower/core.properties rename search-solr/{bitnami/solr/server => }/solr/solr.xml (86%) diff --git a/.gitignore b/.gitignore index 6f064418..7922ac8f 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,8 @@ search-solr/test_solr .DS_Store -.history \ No newline at end of file +.history + +__pycache__ + +.python-version diff --git a/search-api/.env.sample b/search-api/.env.sample index 731f8cd5..d737f349 100644 --- a/search-api/.env.sample +++ b/search-api/.env.sample @@ -5,69 +5,68 @@ FLASK_APP=wsgi.py # Namespace POD_NAMESPACE=local -# third party Services -LD_SDK_KEY= +# Timeouts +AUTH_API_TIMEOUT=20 +PAY_API_TIMEOUT=20 +BUSINESS_API_TIMEOUT=30 + +# Sentry SENTRY_DSN= SENTRY_TSR=1.0 -# Registry Integration Services -AUTH_SVC_URL= -PAYMENT_SVC_URL= -LEGAL_API_URL= -LEGAL_API_VERSION_2=/api/v2 - -# SQL Alchemy -DATABASE_USERNAME= -DATABASE_PASSWORD= -DATABASE_NAME= -DATABASE_HOST= -DATABASE_PORT= - -# SOLR URL -SOLR_SVC_URL= - -# TEST SOLR -SOLR_SVC_TEST_URL= +# LDarkly +LD_SDK_KEY= +OPS_LOGGER_LEVEL=ops-logger-level -# Flag Names -FF_QUEUE_DOC_REQUEST_NAME= -OPS_LOGGER_LEVEL= -PUBSUB_EMULATOR_HOST=fake +# JWT Settings +JWT_OIDC_JWKS_CACHE_TIMEOUT=300 +JWT_OIDC_ALGORITHMS=RS256 +JWT_OIDC_AUDIENCE=business-search-service +JWT_OIDC_CACHING_ENABLED=True -# Queue -QUEUE_PROJECT_ID= +# Test values +SOLR_SVC_BUS_LEADER_TEST_URL=http://localhost:8873/solr +SOLR_SVC_BUS_FOLLOWER_TEST_URL=http://localhost:8873/solr -# TEST DB DATABASE_TEST_USERNAME= DATABASE_TEST_PASSWORD= DATABASE_TEST_NAME= DATABASE_TEST_HOST=localhost DATABASE_TEST_PORT=5432 -# Service Account -ACCOUNT_SVC_AUTH_URL= -ACCOUNT_SVC_CLIENT_ID= -ACCOUNT_SVC_CLIENT_SECRET= - -# JWT Settings -JWT_OIDC_WELL_KNOWN_CONFIG= -JWT_OIDC_ALGORITHMS=RS256 -JWT_OIDC_AUDIENCE= -JWT_OIDC_CLIENT_SECRET= -JWT_OIDC_CACHING_ENABLED=True -JWT_OIDC_JWKS_CACHE_TIMEOUT=300 - -# SOLR REINDEX INFO -# i.e. 02, 09, 21 -SOLR_REINDEX_DAY= -SOLR_REINDEX_WEEKDAY=1 # 0-6 (0 = Monday with python3.8) -SOLR_REINDEX_START_TIME=07:05:00+0000 # HH:MM:SS+HHMM -SOLR_REINDEX_LENGTH=15 # minutes - -# INTEGRATION TESTS RUN_AUTHORIZATION_TESTS=True RUN_LD_TESTS=True RUN_NATS_TESTS=True RUN_PAYMENT_TESTS=True RUN_SOLR_TESTS=True NOT_GITHUB_CI=True + +## Local + +# SQL Alchemy +DATABASE_USERNAME= +DATABASE_PASSWORD= +DATABASE_NAME= +DATABASE_HOST=localhost +DATABASE_PORT=5432 + +# Solr +SOLR_SVC_BUS_LEADER_URL=http://localhost:8873/solr +SOLR_SVC_BUS_FOLLOWER_URL=http://localhost:8873/solr +SOLR_SVC_BUS_LEADER_CORE=business +SOLR_SVC_BUS_FOLLOWER_CORE=business + +## DEV +AUTH_SVC_URL=https://auth-api-dev.apps.silver.devops.gov.bc.ca/api/v1 +PAYMENT_SVC_URL=https://pay-api-dev.apps.silver.devops.gov.bc.ca/api/v1/payment-requests +LEGAL_API_URL=https://legal-api-dev.apps.silver.devops.gov.bc.ca +LEGAL_API_VERSION_2=/api/v2 + +JWT_OIDC_CLIENT_SECRET= +JWT_OIDC_ISSUER= +JWT_TOKEN_URL= +JWT_OIDC_WELL_KNOWN_CONFIG= + +ACCOUNT_SVC_AUTH_URL= +ACCOUNT_SVC_CLIENT_ID= +ACCOUNT_SVC_CLIENT_SECRET= diff --git a/search-api/migrations/alembic.ini b/search-api/migrations/alembic.ini index ec9d45c2..edaa1161 100644 --- a/search-api/migrations/alembic.ini +++ b/search-api/migrations/alembic.ini @@ -3,6 +3,7 @@ [alembic] # template used to generate migration files # file_template = %%(rev)s_%%(slug)s +file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d%%(second).2d_%%(rev)s_%%(slug)s # set to 'true' to run the environment during # the 'revision' command, regardless of autogenerate diff --git a/search-api/migrations/versions/20240723_131259_eaf25b9a20bf_.py b/search-api/migrations/versions/20240723_131259_eaf25b9a20bf_.py new file mode 100644 index 00000000..4e170ac8 --- /dev/null +++ b/search-api/migrations/versions/20240723_131259_eaf25b9a20bf_.py @@ -0,0 +1,42 @@ +"""empty message + +Revision ID: eaf25b9a20bf +Revises: c10f3d4b2262 +Create Date: 2024-07-23 13:12:59.736684 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'eaf25b9a20bf' +down_revision = 'c10f3d4b2262' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('solr_doc_events', schema=None) as batch_op: + batch_op.add_column(sa.Column('event_last_update', sa.DateTime(timezone=True), nullable=True)) + + with op.batch_alter_table('users', schema=None) as batch_op: + batch_op.drop_index('ix_user_idp_userid') + batch_op.drop_constraint('users_idp_userid_key', type_='unique') + batch_op.create_index(batch_op.f('ix_users_idp_userid'), ['idp_userid'], unique=True) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('users', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('ix_users_idp_userid')) + batch_op.create_unique_constraint('users_idp_userid_key', ['idp_userid']) + batch_op.create_index('ix_user_idp_userid', ['idp_userid'], unique=True) + + with op.batch_alter_table('solr_doc_events', schema=None) as batch_op: + batch_op.drop_column('event_last_update') + + # ### end Alembic commands ### diff --git a/search-api/requirements.txt b/search-api/requirements.txt index 475b3d95..45ab19d4 100644 --- a/search-api/requirements.txt +++ b/search-api/requirements.txt @@ -1,68 +1,66 @@ -Babel==2.12.1 -Flask-Cors==3.0.10 +Babel==2.15.0 +Flask-Cors==4.0.0 Flask-Migrate==4.0.4 -Flask-Moment==1.0.5 -Flask-SQLAlchemy==3.0.3 -Flask==2.2.3 -Jinja2==3.1.2 -Mako==1.2.4 -MarkupSafe==2.1.2 -SQLAlchemy==2.0.6 -Werkzeug==2.2.3 -alembic==1.10.2 -attrs==22.2.0 -blinker==1.5 -cachelib==0.10.2 -cachetools==5.3.0 -certifi==2022.12.7 -charset-normalizer==3.1.0 -click==8.1.3 +Flask-Moment==1.0.6 +Flask-SQLAlchemy==3.0.5 +Flask==2.3.2 +Jinja2==3.1.4 +Mako==1.3.5 +MarkupSafe==2.1.5 +SQLAlchemy==2.0.19 +Werkzeug==2.3.6 +alembic==1.13.2 +attrs==23.2.0 +blinker==1.8.2 +cachelib==0.13.0 +cachetools==5.4.0 +certifi==2024.7.4 +charset-normalizer==3.3.2 +click==8.1.7 datedelta==1.4 -dpath==2.1.4 -ecdsa==0.18.0 +dpath==2.2.0 +ecdsa==0.19.0 expiringdict==1.2.2 -flask-babel==3.0.1 +flask-babel==4.0.0 flask-jwt-oidc==0.3.0 -google-api-core==2.11.0 -google-auth-oauthlib==1.0.0 -google-auth==2.16.2 -google-cloud-core==2.3.2 -google-cloud-pubsub==2.15.0 -google-cloud-storage==2.7.0 +google-api-core==2.19.1 +google-auth-oauthlib==1.2.1 +google-auth==2.32.0 +google-cloud-core==2.4.1 +google-cloud-pubsub==2.22.0 +google-cloud-storage==2.17.0 google-crc32c==1.5.0 -google-resumable-media==2.4.1 -googleapis-common-protos==1.58.0 -greenlet==2.0.2 -grpc-google-iam-v1==0.12.6 -grpcio-status==1.51.3 -grpcio==1.51.3 -gunicorn==20.1.0 -idna==3.4 -itsdangerous==2.1.2 +google-resumable-media==2.7.1 +googleapis-common-protos==1.63.2 +grpc-google-iam-v1==0.13.1 +grpcio-status==1.65.1 +grpcio==1.65.1 +gunicorn==22.0.0 +idna==3.7 +itsdangerous==2.2.0 jsonschema==3.2.0 -launchdarkly-server-sdk==8.1.1 +launchdarkly-server-sdk==8.1.4 oauthlib==3.2.2 -packaging==23.0 -proto-plus==1.22.2 -protobuf==4.22.1 -psycopg2-binary==2.9.5 +packaging==24.1 +proto-plus==1.24.0 +protobuf==5.27.2 +psycopg2-binary==2.9.9 pyRFC3339==1.1 -pyasn1-modules==0.2.8 -pyasn1==0.4.8 -pyrsistent==0.19.3 -python-dateutil==2.8.2 -python-dotenv==1.0.0 +pyasn1==0.6.0 +pyasn1_modules==0.4.0 +pyrsistent==0.20.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 python-jose==3.3.0 -pytz==2022.7.1 -requests-oauthlib==1.3.1 -requests==2.28.2 +pytz==2024.1 +requests-oauthlib==2.0.0 +requests==2.31.0 rsa==4.9 semver==2.13.0 -sentry-sdk==1.20.0 +sentry-sdk==2.10.0 six==1.16.0 strict-rfc3339==0.7 -typing_extensions==4.5.0 -urllib3==1.26.15 -git+https://github.com/bcgov/registry-schemas.git@1.5.3#egg=registry_schemas +typing_extensions==4.12.2 +urllib3==1.26.16 git+https://github.com/daxiom/simple-cloudevent.py.git@0.0.2 -git+https://github.com/daxiom/flask-pub.git@0.0.4 +git+https://github.com/daxiom/flask-pub.git@0.0.4 \ No newline at end of file diff --git a/search-api/requirements/bcregistry-libraries.txt b/search-api/requirements/bcregistry-libraries.txt index 60745a73..e69de29b 100644 --- a/search-api/requirements/bcregistry-libraries.txt +++ b/search-api/requirements/bcregistry-libraries.txt @@ -1 +0,0 @@ -git+https://github.com/bcgov/registry-schemas.git@1.5.3#egg=registry_schemas diff --git a/search-api/setup.cfg b/search-api/setup.cfg index de7aa316..5523eb7b 100644 --- a/search-api/setup.cfg +++ b/search-api/setup.cfg @@ -41,7 +41,7 @@ application_import_names=search_api per-file-ignores = *.py:I001 */__init__.py:F401 - */solr_docs.py:N815 + */doc_models/*.py:N815,F401 [pycodestyle] max_line_length = 120 diff --git a/search-api/src/search_api/__init__.py b/search-api/src/search_api/__init__.py index ca457351..f4a50ba9 100644 --- a/search-api/src/search_api/__init__.py +++ b/search-api/src/search_api/__init__.py @@ -23,17 +23,14 @@ import sentry_sdk # noqa: I001; pylint: disable=ungrouped-imports; conflicts with Flake8 from sentry_sdk.integrations.flask import FlaskIntegration # noqa: I001 -from flask import redirect, url_for, Flask # noqa: I001 -from flask_migrate import Migrate -from registry_schemas import __version__ as registry_schemas_version -from registry_schemas.flask import SchemaServices # noqa: I001 +from flask import Flask, redirect # noqa: I001 +from flask_migrate import Migrate # noqa: I001 from search_api import errorhandlers, models from search_api.config import config from search_api.models import db from search_api.resources import v1_endpoint -from search_api.schemas import rsbc_schemas -from search_api.services import Flags, queue, search_solr +from search_api.services import Flags, business_solr, queue from search_api.translations import babel from search_api.utils.auth import jwt from search_api.utils.logging import set_log_level_by_flag, setup_logging @@ -65,9 +62,8 @@ def create_app(config_name: str = os.getenv('APP_ENV') or 'production', **kwargs errorhandlers.init_app(app) db.init_app(app) - rsbc_schemas.init_app(app) queue.init_app(app) - search_solr.init_app(app) + business_solr.init_app(app) babel.init_app(app) migrate.init_app(app, db) @@ -88,7 +84,6 @@ def be_nice_swagger_redirect(): # pylint: disable=unused-variable def add_version(response): # pylint: disable=unused-variable version = get_run_version() response.headers['API'] = f'search_api/{version}' - response.headers['SCHEMAS'] = f'registry_schemas/{registry_schemas_version}' return response register_shellcontext(app) diff --git a/search-api/src/search_api/config.py b/search-api/src/search_api/config.py index 903cceb6..6a72ecac 100644 --- a/search-api/src/search_api/config.py +++ b/search-api/src/search_api/config.py @@ -35,9 +35,18 @@ class Config(): # pylint: disable=too-few-public-methods PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) + # Used by /sync endpoint + MAX_BATCH_UPDATE_NUM = int(os.getenv('MAX_BATCH_UPDATE_NUM', '1000')) + # Used by /sync heartbeat + LAST_REPLICATION_THRESHOLD = int(os.getenv('LAST_REPLICATION_THRESHOLD', '24')) # hours + + SOLR_SVC_BUS_LEADER_CORE = os.getenv('SOLR_SVC_BUS_LEADER_CORE', 'business') + SOLR_SVC_BUS_FOLLOWER_CORE = os.getenv('SOLR_SVC_BUS_FOLLOWER_CORE', 'business_follower') + SOLR_SVC_BUS_LEADER_URL = os.getenv('SOLR_SVC_BUS_LEADER_URL', 'http://localhost:8873/solr') + SOLR_SVC_BUS_FOLLOWER_URL = os.getenv('SOLR_SVC_BUS_FOLLOWER_URL', 'http://localhost:8873/solr') + PAYMENT_SVC_URL = os.getenv('PAYMENT_SVC_URL', 'http://') AUTH_SVC_URL = os.getenv('AUTH_SVC_URL', 'http://') - SOLR_SVC_URL = os.getenv('SOLR_SVC_URL', 'http://') LEAR_SVC_URL = os.getenv('LEGAL_API_URL', 'http://') + os.getenv('LEGAL_API_VERSION_2', '/api/v2') # Flask-Pub @@ -138,15 +147,6 @@ class Config(): # pylint: disable=too-few-public-methods GATEWAY_URL = os.getenv('GATEWAY_URL', 'https://bcregistry-dev.apigee.net') SUBSCRIPTION_API_KEY = os.getenv('SUBSCRIPTION_API_KEY') - # reindex times - SOLR_REINDEX_DAY = os.getenv('SOLR_REINDEX_DAY', None) - if os.getenv('SOLR_REINDEX_WEEKDAY'): - SOLR_REINDEX_WEEKDAY = int(os.getenv('SOLR_REINDEX_WEEKDAY')) - else: - SOLR_REINDEX_WEEKDAY = None - SOLR_REINDEX_START_TIME = os.getenv('SOLR_REINDEX_START_TIME', '') - SOLR_REINDEX_LENGTH = int(os.getenv('SOLR_REINDEX_LENGTH')) if os.getenv('SOLR_REINDEX_LENGTH', None) else 0 - class DevelopmentConfig(Config): # pylint: disable=too-few-public-methods """Config object for development environment.""" @@ -163,7 +163,10 @@ class UnitTestingConfig(Config): # pylint: disable=too-few-public-methods DEVELOPMENT = False TESTING = True # SOLR - SOLR_SVC_URL = os.getenv('SOLR_SVC_TEST_URL', 'http://') + SOLR_SVC_BUS_LEADER_CORE = os.getenv('SOLR_SVC_BUS_LEADER_TEST_CORE', 'business') + SOLR_SVC_BUS_FOLLOWER_CORE = os.getenv('SOLR_SVC_BUS_FOLLOWER_TEST_CORE', 'business') + SOLR_SVC_BUS_LEADER_URL = os.getenv('SOLR_SVC_BUS_LEADER_TEST_URL', 'http://localhost:8980/solr') + SOLR_SVC_BUS_FOLLOWER_URL = os.getenv('SOLR_SVC_BUS_FOLLOWER_TEST_URL', 'http://localhost:8980/solr') # POSTGRESQL DB_USER = os.getenv('DATABASE_TEST_USERNAME', '') DB_PASSWORD = os.getenv('DATABASE_TEST_PASSWORD', '') diff --git a/search-api/src/search_api/exceptions/__init__.py b/search-api/src/search_api/exceptions/__init__.py index 4275e8e7..6cb42bcb 100644 --- a/search-api/src/search_api/exceptions/__init__.py +++ b/search-api/src/search_api/exceptions/__init__.py @@ -19,10 +19,9 @@ error - a description of the error {code / description: classname / full text} status_code - where possible use HTTP Error Codes """ -import functools +from dataclasses import dataclass from enum import Enum from http import HTTPStatus -from typing import Dict, List class ResourceErrorCodes(str, Enum): @@ -43,26 +42,33 @@ class ResourceErrorCodes(str, Enum): STORAGE_ERR = '013' -class BusinessException(Exception): - """Exception that adds error code and error name, that can be used for i18n support.""" +@dataclass +class BaseExceptionE(Exception): + """Base exception class for custom exceptions.""" + + error: str + message: str = None + status_code: HTTPStatus = None - def __init__(self, error: str, status_code: HTTPStatus, *args, **kwargs): - """Return a valid BusinessException.""" - super(BusinessException, self).__init__(*args, **kwargs) # pylint: disable=super-with-arguments - self.error = error - self.status_code = status_code +@dataclass +class SolrException(BaseExceptionE): + """Solr search/update/delete exception.""" -class DatabaseException(Exception): - """Database insert/update exception.""" + def __post_init__(self): + """Return a valid SolrException.""" + if self.status_code != HTTPStatus.SERVICE_UNAVAILABLE: + self.error += f', {self.status_code}' + self.status_code = HTTPStatus.INTERNAL_SERVER_ERROR + self.message = 'Solr service error while processing request.' -class SolrException(Exception): - """Solr search/update/delete exception.""" +class BusinessException(Exception): + """Exception that adds error code and error name, that can be used for i18n support.""" def __init__(self, error: str, status_code: HTTPStatus, *args, **kwargs): - """Return a valid SolrException.""" - super(SolrException, self).__init__(*args, **kwargs) # pylint: disable=super-with-arguments + """Return a valid BusinessException.""" + super(BusinessException, self).__init__(*args, **kwargs) # pylint: disable=super-with-arguments self.error = error self.status_code = status_code @@ -70,7 +76,7 @@ def __init__(self, error: str, status_code: HTTPStatus, *args, **kwargs): class ApiConnectionException(Exception): """Api Connection exception.""" - def __init__(self, code: int, detail: List[Dict]): + def __init__(self, code: int, detail: list[dict]): """Initialize the error object.""" super(ApiConnectionException, self).__init__() # pylint: disable=super-with-arguments self.code = code diff --git a/search-api/src/search_api/models/solr_doc.py b/search-api/src/search_api/models/solr_doc.py index cb3199f3..163906a7 100644 --- a/search-api/src/search_api/models/solr_doc.py +++ b/search-api/src/search_api/models/solr_doc.py @@ -43,6 +43,11 @@ def find_most_recent_by_identifier(cls, identifier: str) -> SolrDoc: """Return most recently submitted SolrDoc by identifier.""" return cls.query.filter_by(identifier=identifier).order_by(cls.submission_date.desc()).first() + @classmethod + def get_by_id(cls, doc_id: int) -> SolrDoc: + """Return the solr doc by its ID.""" + return cls.query.filter_by(id=doc_id).one_or_none() + @staticmethod def get_updated_identifiers_after_date(date: datetime) -> List[str]: """Return all identifiers with a submitted SolrDoc after the date.""" diff --git a/search-api/src/search_api/models/solr_doc_event.py b/search-api/src/search_api/models/solr_doc_event.py index 36ad26d8..906f165b 100644 --- a/search-api/src/search_api/models/solr_doc_event.py +++ b/search-api/src/search_api/models/solr_doc_event.py @@ -16,6 +16,8 @@ from datetime import datetime +from sqlalchemy import event + from search_api.enums import SolrDocEventStatus, SolrDocEventType from .db import db @@ -28,6 +30,7 @@ class SolrDocEvent(db.Model): # pylint: disable=too-few-public-methods id = db.Column(db.Integer, primary_key=True) event_date = db.Column(db.DateTime(timezone=True), default=datetime.utcnow) + event_last_update = db.Column(db.DateTime(timezone=True), default=datetime.utcnow) event_status = db.Column(db.Enum(SolrDocEventStatus), default=SolrDocEventStatus.PENDING) event_type = db.Column(db.Enum(SolrDocEventType), nullable=False) @@ -38,3 +41,36 @@ def save(self) -> SolrDocEvent: db.session.add(self) db.session.commit() return self + + @classmethod + def get_events_by_status(cls, + statuses: list[SolrDocEventStatus], + event_type: SolrDocEventType = None, + start_date: datetime = None, + limit: int = None) -> list[SolrDocEvent]: + """Update the status of the given events.""" + query = cls.query.filter(cls.event_status.in_(statuses)) + if event_type: + query = query.filter(cls.event_type == event_type) + if start_date: + query = query.filter(cls.event_date > start_date) + + query = query.order_by(cls.event_date) + if limit: + query = query.limit(limit) + + return query.all() + + @classmethod + def update_events_status(cls, status: SolrDocEventStatus, events: list[SolrDocEvent]): + """Update the status of the given events.""" + for doc_event in events: + doc_event.event_status = status + db.session.add(doc_event) + db.session.commit() + + +@event.listens_for(SolrDocEvent, 'before_update') +def receive_before_change(mapper, connection, target: SolrDocEvent): # pylint: disable=unused-argument + """Set the last updated value.""" + target.event_last_update = datetime.utcnow() diff --git a/search-api/src/search_api/request_handlers/__init__.py b/search-api/src/search_api/request_handlers/__init__.py index 44d8150a..d8654b70 100644 --- a/search-api/src/search_api/request_handlers/__init__.py +++ b/search-api/src/search_api/request_handlers/__init__.py @@ -12,5 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. """Exports request handler functions.""" -from .search import business_search, business_suggest, parties_search -from .update_solr_handler import update_search_solr +from .update_solr_handler import resync_business_solr, update_business_solr diff --git a/search-api/src/search_api/request_handlers/search.py b/search-api/src/search_api/request_handlers/search.py deleted file mode 100644 index fe8ef903..00000000 --- a/search-api/src/search_api/request_handlers/search.py +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""API request handlers for Search.""" -from typing import Dict, List - -from search_api.services import search_solr -from search_api.services.solr import Solr -from search_api.services.solr.solr_fields import SolrField - - -class SearchParams: # pylint: disable=too-few-public-methods - """Class definition of search params.""" - - def __init__(self, # pylint: disable=too-many-arguments - query: Dict[str, str], - start: int, - rows: int, - legal_types: List[str] = None, - states: List[str] = None, - party_roles: List[str] = None): - """Init instance.""" - self.query = query - self.start = start - self.rows = rows - self.legal_types = legal_types - self.states = states - self.party_roles = party_roles - - -def business_search(params: SearchParams): - """Return the list of businesses from Solr that match the query.""" - # build base query - solr_query_params = Solr.build_split_query( - params.query, - [ - SolrField.NAME_Q.value, - SolrField.NAME_STEM_AGRO.value, - SolrField.IDENTIFIER_Q.value, - SolrField.BN_Q.value - ], - [SolrField.NAME_Q.value] - ) - - # facets - solr_query_params['facet'] = 'on' - solr_query_params['json.facet'] = search_solr.base_facets - # filter queries - filter_q = '' - if params.legal_types: - filter_q = '(' + Solr.build_filter_query(SolrField.TYPE.value, [x.upper() for x in params.legal_types]) - if params.states: - filter_str = Solr.build_filter_query(SolrField.STATE.value, [x.upper() for x in params.states]) - filter_q = filter_q + ' AND ' + filter_str if filter_q else '(' + filter_str - - filter_q = filter_q + ')' if filter_q else '' - if filter_q: - if solr_query_params.get('fq'): - solr_query_params['fq'] += ' AND ' + filter_q - else: - solr_query_params['fq'] = filter_q - # boosts for result ordering - solr_query_params['defType'] = 'edismax' - solr_query_params['bq'] = f'{SolrField.NAME_Q.value}:("{params.query["value"]}"~10)^30.0' + \ - f' AND {SolrField.NAME_STEM_AGRO.value}:("{params.query["value"]}"~10)^20.0' + \ - f' AND {SolrField.NAME_Q.value}:({params.query["value"].split()[0]}*)^10.0' + \ - f' AND {SolrField.NAME_SUGGEST.value}:({params.query["value"].split()[0]}*)^5.0' - - solr_query_params['fl'] = search_solr.base_fields - return search_solr.query(solr_query_params, params.start, params.rows) - - -def business_suggest(query: str, highlight: bool, rows: int) -> List: - """Return the list of business suggestions from Solr from given text.""" - if not rows: - rows = search_solr.default_rows - - # 1st solr query (names) - name_suggestions = search_solr.suggest(query, rows) - - # 2nd solr query (extra names) - extra_name_suggestions = [] - if len(name_suggestions) < rows: - name_select_params = Solr.build_split_query({'value': query}, [SolrField.NAME_SINGLE.value], []) - name_select_params['fl'] = search_solr.base_fields - name_docs = search_solr.query(name_select_params, rows).get('response', {}).get('docs', []) - extra_name_suggestions = [x.get(SolrField.NAME.value).upper() for x in name_docs if x.get(SolrField.NAME.value)] - # remove dups - name_suggestions = name_suggestions + list(set(extra_name_suggestions) - set(name_suggestions)) - query = query.upper() # NOTE: needed for bn/identifier processing too - # highlight - if highlight: - name_suggestions = Solr.highlight_names(query, name_suggestions) - - # 3rd solr query (bns + identifiers) - identifier_suggestions = [] - bn_suggestions = [] - if len(name_suggestions) < rows: - bn_id_params = { - 'q': f'{SolrField.IDENTIFIER_Q.value}:{query} OR {SolrField.BN_Q.value}:{query}', - 'fl': search_solr.base_fields} - bn_id_docs = search_solr.query(bn_id_params, 0, rows).get('response', {}).get('docs', []) - if highlight: - # return list of identifier strings with highlighted query - identifier_suggestions = [ - x.get(SolrField.IDENTIFIER.value).replace(query, f'{query}') - for x in bn_id_docs if query in x.get(SolrField.IDENTIFIER.value)] - # return list of bn strings with highlighted query - bn_suggestions = [ - x.get(SolrField.BN.value).replace(query, f'{query}') - for x in bn_id_docs if x.get(SolrField.BN.value) and query in x.get(SolrField.BN.value, '')] - else: - identifier_suggestions = [ - x.get(SolrField.IDENTIFIER.value) for x in bn_id_docs if query in x.get(SolrField.IDENTIFIER.value)] - bn_suggestions = [ - x.get(SolrField.BN.value) for x in bn_id_docs - if x.get(SolrField.BN.value) and query in x.get(SolrField.BN.value, '')] - - # format/combine response - suggestions = [{'type': SolrField.NAME.value, 'value': x} for x in name_suggestions] - suggestions += [{'type': SolrField.IDENTIFIER.value, 'value': x} for x in identifier_suggestions] - suggestions += [{'type': SolrField.BN.value, 'value': x} for x in bn_suggestions] - return suggestions[:rows] - - -def parties_search(params: SearchParams): - """Return the list of parties from Solr that match the query.""" - # build base query - solr_query_params = Solr.build_split_query(params.query, - [SolrField.PARTY_NAME_Q.value, SolrField.PARTY_NAME_STEM_AGRO.value], - [SolrField.PARTY_NAME_Q.value, SolrField.PARENT_NAME_Q.value]) - # facets - solr_query_params['facet'] = 'on' - solr_query_params['json.facet'] = search_solr.party_facets - # filters - filter_q = '' - if params.party_roles: - filter_q = '(' + Solr.build_filter_query(SolrField.PARTY_ROLE.value, [x.lower() for x in params.party_roles]) - if params.legal_types: - filter_str = Solr.build_filter_query(SolrField.PARENT_TYPE.value, [x.upper() for x in params.legal_types]) - filter_q = filter_q + ' AND ' + filter_str if filter_q else '(' + filter_str - if params.states: - filter_str = Solr.build_filter_query(SolrField.PARENT_STATE.value, [x.upper() for x in params.states]) - filter_q = filter_q + ' AND ' + filter_str if filter_q else '(' + filter_str - - filter_q = filter_q + ')' if filter_q else '' - if filter_q: - if solr_query_params.get('fq'): - solr_query_params['fq'] += ' AND ' + filter_q - else: - solr_query_params['fq'] = filter_q - - # boosts for result ordering - solr_query_params['defType'] = 'edismax' - solr_query_params['bq'] = f'{SolrField.PARTY_NAME_Q.value}:("{params.query["value"]}"~10)^30.0' + \ - f' AND {SolrField.PARTY_NAME_STEM_AGRO.value}:("{params.query["value"]}"~10)^20.0' + \ - f' AND {SolrField.PARTY_NAME_Q.value}:({params.query["value"].split()[0]}*)^10.0' + \ - f' AND {SolrField.PARTY_NAME_SUGGEST.value}:({params.query["value"].split()[0]}*)^5.0' - - solr_query_params['fl'] = search_solr.party_fields - - return search_solr.query(solr_query_params, params.start, params.rows) diff --git a/search-api/src/search_api/request_handlers/update_solr_handler.py b/search-api/src/search_api/request_handlers/update_solr_handler.py index b8a789d4..4cfb4835 100644 --- a/search-api/src/search_api/request_handlers/update_solr_handler.py +++ b/search-api/src/search_api/request_handlers/update_solr_handler.py @@ -12,32 +12,49 @@ # See the License for the specific language governing permissions and # limitations under the License. """API request handlers for solr updates / resyncs.""" -from time import sleep -from typing import Dict - from flask import current_app from search_api.enums import SolrDocEventStatus, SolrDocEventType from search_api.models import SolrDoc, SolrDocEvent -from search_api.services import search_solr -from search_api.services.solr.solr_docs import BusinessDoc +from search_api.services import business_solr +from search_api.services.business_solr.doc_models import BusinessDoc + + +def update_business_solr(identifiers: list[str], doc_events: list[SolrDocEvent]): + """Update the docs for the entity_ids in the solr instance.""" + businesses: list[BusinessDoc] = [] + for identifier in identifiers: + doc_update = SolrDoc.find_most_recent_by_identifier(identifier) + businesses.append(BusinessDoc(**doc_update.doc)) + try: + # update people + business_solr.create_or_replace_docs(businesses, additive=False) + SolrDocEvent.update_events_status(SolrDocEventStatus.COMPLETE, doc_events) + + except Exception as err: # noqa: B902 + # log / update event / pass err + current_app.logger.debug('Failed to UPDATE solr for %s', identifiers) + SolrDocEvent.update_events_status(SolrDocEventStatus.ERROR, doc_events) + raise err -def update_search_solr(identifier: str, event_type: SolrDocEventType) -> Dict[str, str]: - """Update the doc for the identifier in the solr instance.""" - doc_update = SolrDoc.find_most_recent_by_identifier(identifier) - if doc_update: - doc_event = SolrDocEvent(event_type=event_type, solr_doc_id=doc_update.id).save() - try: - # pause for 1 second before update so that solr doesn't get overloaded on large batches of this call - sleep(1) - search_solr.create_or_replace_docs([BusinessDoc(**doc_update.doc)], force=True) - doc_event.event_status = SolrDocEventStatus.COMPLETE - doc_event.save() +def resync_business_solr(identifiers: list[str]): + """Re-apply the docs for the given identifiers.""" + businesses: list[BusinessDoc] = [] + doc_events: list[SolrDocEvent] = [] + for identifier in identifiers: + doc_update = SolrDoc.find_most_recent_by_identifier(identifier) + businesses.append(BusinessDoc(**doc_update.doc)) + # add separate event for resync + doc_event = SolrDocEvent(event_type=SolrDocEventType.RESYNC, solr_doc_id=doc_update.id).save() + doc_events.append(doc_event) + try: + if len(businesses) > 0: + business_solr.create_or_replace_docs(businesses, additive=False) + SolrDocEvent.update_events_status(SolrDocEventStatus.COMPLETE, doc_events) - except Exception as err: # noqa: B902 - # log / update event / pass err - current_app.logger.debug('Failed to %s solr for %s', event_type, identifier) - doc_event.event_status = SolrDocEventStatus.ERROR - doc_event.save() - raise err + except Exception as err: # noqa: B902 + # log / update event / pass err + current_app.logger.debug('Failed to RESYNC solr for %s', identifiers) + SolrDocEvent.update_events_status(SolrDocEventStatus.ERROR, doc_events) + raise err diff --git a/search-api/src/search_api/resources/__init__.py b/search-api/src/search_api/resources/__init__.py index 640ae2e9..ef2e0a34 100644 --- a/search-api/src/search_api/resources/__init__.py +++ b/search-api/src/search_api/resources/__init__.py @@ -21,3 +21,8 @@ name='API_V1', path=EndpointVersionPath.API_V1, bps=[bus_bp, meta_bp, ops_bp, purchases_bp, internal_bp]) + +v2_endpoint = VersionEndpoint( # pylint: disable=invalid-name + name='API_V2', + path=EndpointVersionPath.API_V2, + bps=[]) diff --git a/search-api/src/search_api/resources/constants.py b/search-api/src/search_api/resources/constants.py index 44841d2d..d191d1b7 100644 --- a/search-api/src/search_api/resources/constants.py +++ b/search-api/src/search_api/resources/constants.py @@ -21,3 +21,4 @@ class EndpointVersionPath(str, Enum): """Enumerate the endpoint mounts used in the system.""" API_V1 = '/api/v1' + API_V2 = '/api/v2' diff --git a/search-api/src/search_api/resources/utils.py b/search-api/src/search_api/resources/utils.py index b9abd8dd..0eb829df 100644 --- a/search-api/src/search_api/resources/utils.py +++ b/search-api/src/search_api/resources/utils.py @@ -16,8 +16,7 @@ from flask import jsonify, current_app -from search_api.exceptions import ResourceErrorCodes -from search_api.services.authz import user_orgs, is_reg_staff_account, is_sbc_office_account, is_bcol_help +from search_api.exceptions import BaseExceptionE, ResourceErrorCodes # Resource error messages @@ -78,26 +77,24 @@ def account_required_response(): return jsonify({'message': message}), HTTPStatus.BAD_REQUEST -def error_response(status_code, message): - """Build generic error response.""" - return jsonify({'message': message}), status_code - - -def bad_request_response(message): +def bad_request_response(message: str, errors: list[dict[str, str]] = None): """Build generic bad request response.""" - return jsonify({'message': message}), HTTPStatus.BAD_REQUEST + return jsonify({'message': message, 'details': errors or []}), HTTPStatus.BAD_REQUEST -def staff_payment_bcol_fas(): - """Build staff payment info error response.""" - message = STAFF_SEARCH_BCOL_FAS.format(code=ResourceErrorCodes.VALIDATION_ERR) - return jsonify({'message': message}), HTTPStatus.BAD_REQUEST - - -def sbc_payment_invalid(): - """Build sbc payment info error response.""" - message = SBC_SEARCH_NO_PAYMENT.format(code=ResourceErrorCodes.VALIDATION_ERR) - return jsonify({'message': message}), HTTPStatus.BAD_REQUEST +def exception_response(exception: BaseExceptionE): + """Build exception error response.""" + current_app.logger.error(repr(exception)) + try: + message = exception.message or 'Error processing request.' + detail = exception.error or repr(exception) + status_code = exception.status_code or HTTPStatus.INTERNAL_SERVER_ERROR + except Exception: # noqa B902; Catch all scenario. + # uncaught exception + message = 'Error processing request.' + detail = repr(exception) + status_code = HTTPStatus.INTERNAL_SERVER_ERROR + return jsonify({'message': message, 'detail': detail}), status_code def sbc_payment_required(message: str, detail: str, error_type: str): @@ -105,52 +102,6 @@ def sbc_payment_required(message: str, detail: str, error_type: str): return jsonify({'message': message, 'detail': detail, 'type': error_type}), HTTPStatus.PAYMENT_REQUIRED -def validation_error_response(errors, cause, additional_msg: str = None): - """Build a schema validation error response.""" - message = ResourceErrorCodes.VALIDATION_ERR + ': ' + cause - details = serialize(errors) - if additional_msg: - details.append('Additional validation: ' + additional_msg) - return jsonify({'message': message, 'detail': details}), HTTPStatus.BAD_REQUEST - - -def db_exception_response(exception, account_id: str, context: str): - """Build a database error response.""" - message = DATABASE.format(code=ResourceErrorCodes.DATABASE_ERR, context=context, account_id=account_id) - current_app.logger.error(message) - return jsonify({'message': message, 'detail': str(exception)}), HTTPStatus.INTERNAL_SERVER_ERROR - - -def business_exception_response(exception): - """Build business exception error response.""" - current_app.logger.error(str(exception)) - return jsonify({'message': exception.error}), exception.status_code - - -def solr_exception_response(exception): - """Build solr exception error response.""" - current_app.logger.error(exception) - message = SOLR.format(code=ResourceErrorCodes.SOLR_ERR, status=exception.status_code) - status_code = HTTPStatus.INTERNAL_SERVER_ERROR - if exception.status_code == HTTPStatus.SERVICE_UNAVAILABLE: - status_code = HTTPStatus.SERVICE_UNAVAILABLE - return jsonify({'message': message, 'detail': exception.error}), status_code - - -# def pay_exception_response(exception: SBCPaymentException, account_id: str = None): -# """Build pay 402 exception error response.""" -# status = exception.status_code -# message = PAYMENT.format(code=ResourceErrorCodes.PAY_ERR, status=status, account_id=account_id) -# if exception.json_data: -# detail = exception.json_data.get('detail', '') -# err_type = exception.json_data.get('type', '') -# return jsonify({'message': message, 'status_code': status, 'type': err_type, 'detail': detail}),\ -# HTTPStatus.PAYMENT_REQUIRED - -# current_app.logger.error(str(exception)) -# return jsonify({'message': message, 'detail': str(exception)}), HTTPStatus.PAYMENT_REQUIRED - - def default_exception_response(exception): """Build default 500 exception error response.""" current_app.logger.error(exception.with_traceback(None)) @@ -158,11 +109,6 @@ def default_exception_response(exception): return jsonify({'message': message, 'detail': exception.with_traceback(None)}), HTTPStatus.INTERNAL_SERVER_ERROR -def service_exception_response(message): - """Build 500 exception error response.""" - return jsonify({'message': message}), HTTPStatus.INTERNAL_SERVER_ERROR - - def not_found_error_response(item, key): """Build a not found error response.""" message = NOT_FOUND.format(code=ResourceErrorCodes.NOT_FOUND_ERR, item=item, key=key) @@ -170,13 +116,6 @@ def not_found_error_response(item, key): return jsonify({'message': message}), HTTPStatus.NOT_FOUND -def duplicate_error_response(message): - """Build a duplicate request error response.""" - err_msg = ResourceErrorCodes.DUPLICATE_ERR + ': ' + message - current_app.logger.info(str(HTTPStatus.CONFLICT.value) + ': ' + message) - return jsonify({'message': err_msg}), HTTPStatus.CONFLICT - - def unauthorized_error_response(account_id): """Build an unauthorized error response.""" message = UNAUTHORIZED.format(code=ResourceErrorCodes.UNAUTHORIZED_ERR, account_id=account_id) @@ -191,53 +130,8 @@ def authorization_expired_error_response(account_id): return jsonify({'message': message}), HTTPStatus.UNAUTHORIZED -def path_param_error_response(param_name): - """Build a bad request param missing error response.""" - message = PATH_PARAM.format(code=ResourceErrorCodes.PATH_PARAM_ERR, param_name=param_name) - current_app.logger.info(str(HTTPStatus.BAD_REQUEST.value) + ': ' + message) - return jsonify({'message': message}), HTTPStatus.BAD_REQUEST - - -def unprocessable_error_response(description): - """Build an unprocessable entity error response.""" - message = f'The {description} request could not be processed (no change/results).' - current_app.logger.info(str(HTTPStatus.UNPROCESSABLE_ENTITY.value) + ': ' + message) - return jsonify({'message': message}), HTTPStatus.UNPROCESSABLE_ENTITY - - -def path_data_mismatch_error_response(path_value, description, data_value): - """Build a bad request path param - payload data mismatch error.""" - message = PATH_MISMATCH.format(code=ResourceErrorCodes.DATA_MISMATCH_ERR, path_value=path_value, - description=description, data_value=data_value) - current_app.logger.info(str(HTTPStatus.BAD_REQUEST.value) + ': ' + message) - return jsonify({'message': message}), HTTPStatus.BAD_REQUEST - - def gcp_storage_service_error(detail): """Build a storage servcie error response.""" message = STORAGE.format(code=ResourceErrorCodes.STORAGE_ERR) current_app.logger.info(str(HTTPStatus.INTERNAL_SERVER_ERROR.value) + ': ' + detail) return jsonify({'message': message, 'detail': detail}), HTTPStatus.INTERNAL_SERVER_ERROR - - -def get_account_name(token: str, account_id: str = None): # pylint: disable=too-many-return-statements; added staff - """Lookup the account organization name from the user token with an auth api call.""" - try: - if account_id is not None and is_reg_staff_account(account_id): - return REG_STAFF_DESC - if account_id is not None and is_sbc_office_account(token, account_id): - return SBC_STAFF_DESC - if account_id is not None and is_bcol_help(account_id): - return BCOL_STAFF_DESC - - orgs = user_orgs(token) - if orgs and 'orgs' in orgs and orgs['orgs']: - if (len(orgs['orgs']) == 1 or not account_id or not account_id.isdigit()): - return orgs['orgs'][0]['name'] - for org in orgs['orgs']: - if org['id'] == int(account_id): - return org['name'] - return None - except Exception as err: # pylint: disable=broad-except # noqa F841; - current_app.logger.error('get_account_name failed: ' + str(err)) - return None diff --git a/search-api/src/search_api/resources/v1/businesses/documents/document_request.py b/search-api/src/search_api/resources/v1/businesses/documents/document_request.py index 452581d9..dab20e78 100644 --- a/search-api/src/search_api/resources/v1/businesses/documents/document_request.py +++ b/search-api/src/search_api/resources/v1/businesses/documents/document_request.py @@ -85,7 +85,7 @@ def post(business_identifier): # pylint: disable=too-many-return-statements errors = RequestValidator.validate_document_access_request(request_json, account_id, token, role) if errors: - return resource_utils.bad_request_response(errors) + return resource_utils.bad_request_response('Invalid payload', errors) document_access_request = save_request(account_id, business_identifier, request_json) diff --git a/search-api/src/search_api/resources/v1/businesses/search.py b/search-api/src/search_api/resources/v1/businesses/search.py index 4827fbda..2fb80034 100644 --- a/search-api/src/search_api/resources/v1/businesses/search.py +++ b/search-api/src/search_api/resources/v1/businesses/search.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""API endpoints for Search Suggester.""" +"""API endpoints for Search.""" import re from contextlib import suppress from http import HTTPStatus @@ -20,11 +20,10 @@ from flask_cors import cross_origin from search_api.exceptions import SolrException -from search_api.request_handlers import business_search, business_suggest, parties_search -from search_api.request_handlers.search import SearchParams -from search_api.services import search_solr -from search_api.services.solr import Solr -from search_api.services.solr.solr_fields import SolrField +from search_api.services import business_solr +from search_api.services.base_solr.utils import QueryParams, parse_facets, prep_query_str +from search_api.services.business_solr.doc_fields import BusinessField, PartyField +from search_api.services.business_solr.utils import business_search, business_suggest, parties_search import search_api.resources.utils as resource_utils @@ -34,9 +33,9 @@ def _clean_request_args(query: str) -> str: """Put backslash on expected param values that start with ':'.""" expected_params = [ - SolrField.NAME.value, SolrField.IDENTIFIER.value, SolrField.BN.value, - SolrField.PARTY_NAME.value, SolrField.PARENT_NAME.value, SolrField.PARENT_IDENTIFIER.value, - SolrField.PARENT_BN.value] + BusinessField.NAME.value, BusinessField.IDENTIFIER.value, BusinessField.BN.value, + PartyField.PARTY_NAME.value, PartyField.PARENT_NAME.value, PartyField.PARENT_IDENTIFIER.value, + PartyField.PARENT_BN.value] query_cleaner_rgx = r'(::|^)(value|' + '|'.join(expected_params) + ')(::)' return re.sub(query_cleaner_rgx, r'\1\2:\:', query) @@ -55,47 +54,49 @@ def facets(): # pylint: disable=too-many-branches, too-many-locals try: # parse query params query = _clean_request_args(request.args.get('query', '')) + if not query: + return resource_utils.bad_request_response( + 'Invalid args', [{'missing param': "Expected url param 'query'."}]) + query_items = query.split('::') - if not query_items: - return jsonify({'message': "Expected url param 'query'."}), HTTPStatus.BAD_REQUEST value = '' name = '' identifier = '' bn = '' # pylint: disable=invalid-name - for item in query_items: with suppress(AttributeError): if param := _parse_url_param('value', item): value = param - elif param := _parse_url_param(SolrField.NAME.value, item): + elif param := _parse_url_param(BusinessField.NAME.value, item): name = param - elif param := _parse_url_param(SolrField.IDENTIFIER.value, item): + elif param := _parse_url_param(BusinessField.IDENTIFIER.value, item): identifier = param - elif param := _parse_url_param(SolrField.BN.value, item): + elif param := _parse_url_param(BusinessField.BN.value, item): bn = param # pylint: disable=invalid-name if not value: - return jsonify({'message': "Expected url param 'query' to have 'value:'."}), HTTPStatus.BAD_REQUEST + return resource_utils.bad_request_response( + 'Invalid args', + [{'query param': "Expected url param 'query' to have 'value:'."}] + ) # clean query values query = { - 'value': Solr.prep_query_str(value), - SolrField.NAME_SINGLE.value: Solr.prep_query_str(name), - SolrField.IDENTIFIER_Q.value: Solr.prep_query_str(identifier), - SolrField.BN_Q.value: Solr.prep_query_str(bn) + 'value': prep_query_str(value, True), + BusinessField.NAME_SINGLE.value: prep_query_str(name), + BusinessField.IDENTIFIER_Q.value: prep_query_str(identifier), + BusinessField.BN_Q.value: prep_query_str(bn) } # parse category params - legal_types = None - states = None + search_categories = {} if categories := request.args.get('categories', '').split('::'): for category in categories: with suppress(AttributeError): - if param := _parse_url_param(SolrField.TYPE.value, category): - legal_types = param.split(',') - elif param := _parse_url_param(SolrField.STATE.value, category): - states = param.split(',') + if param := _parse_url_param(BusinessField.TYPE.value, category): + search_categories[BusinessField.TYPE] = param.upper().split(',') + elif param := _parse_url_param(BusinessField.STATE.value, category): + search_categories[BusinessField.STATE] = param.upper().split(',') # TODO: validate legal_type + state - # TODO: add parties filter # parse paging params start = None rows = None @@ -104,34 +105,64 @@ def facets(): # pylint: disable=too-many-branches, too-many-locals start = int(request.args.get('start', None)) rows = int(request.args.get('rows', None)) except ValueError: # catch invalid start/row entry - return {'message': "Expected integer for params: 'start', 'rows'"}, HTTPStatus.BAD_REQUEST + return resource_utils.bad_request_response( + 'Invalid args', + [{'start/row params': "Expected integer for params: 'start', 'rows'"}] + ) + # set doc fields to return + fields = business_solr.business_fields + if request.args.get('parties') == 'true': + fields = business_solr.business_with_parties_fields # create solr search params obj from parsed params - params = SearchParams(query, start, rows, legal_types, states) + params = QueryParams(query=query, + start=start, + rows=rows, + categories=search_categories, + fields=fields, + query_fields={ + BusinessField.NAME_Q: 'parent', + BusinessField.NAME_STEM_AGRO: 'parent', + BusinessField.NAME_SINGLE: 'parent', + BusinessField.NAME_XTRA_Q: 'parent', + BusinessField.BN_Q: 'parent', + BusinessField.IDENTIFIER_Q: 'parent'}, + query_boost_fields={ + BusinessField.NAME_Q: 2, + BusinessField.NAME_STEM_AGRO: 2, + BusinessField.NAME_SINGLE: 2}, + query_fuzzy_fields={ + BusinessField.NAME_Q: {'short': 1, 'long': 2}, + BusinessField.NAME_STEM_AGRO: {'short': 1, 'long': 2}, + BusinessField.NAME_SINGLE: {'short': 1, 'long': 2}}, + child_query={}, + child_categories={}, + child_date_ranges={}) # execute search - results = business_search(params) + results = business_search(params, business_solr) response = { - 'facets': Solr.parse_facets(results), + 'facets': parse_facets(results), 'searchResults': { 'queryInfo': { - 'rows': rows or search_solr.default_rows, 'query': { 'value': query['value'], - SolrField.NAME.value: query[SolrField.NAME_SINGLE.value] or '', - SolrField.IDENTIFIER.value: query[SolrField.IDENTIFIER_Q.value] or '', - SolrField.BN.value: query[SolrField.BN_Q.value] or '' + BusinessField.NAME.value: query[BusinessField.NAME_SINGLE.value] or '', + BusinessField.IDENTIFIER.value: query[BusinessField.IDENTIFIER_Q.value] or '', + BusinessField.BN.value: query[BusinessField.BN_Q.value] or '' }, 'categories': { - SolrField.TYPE.value: legal_types or '', - SolrField.STATE.value: states or ''}, - 'start': results.get('response', {}).get('start')}, + BusinessField.TYPE.value: search_categories.get(BusinessField.TYPE, ''), + BusinessField.STATE.value: search_categories.get(BusinessField.STATE, '')}, + 'rows': rows or business_solr.default_rows, + 'start': start or 0 + }, 'totalResults': results.get('response', {}).get('numFound'), 'results': results.get('response', {}).get('docs')}} return jsonify(response), HTTPStatus.OK except SolrException as solr_exception: - return resource_utils.solr_exception_response(solr_exception) + return resource_utils.exception_response(solr_exception) except Exception as default_exception: # noqa: B902 return resource_utils.default_exception_response(default_exception) @@ -142,9 +173,11 @@ def parties(): # pylint: disable=too-many-branches, too-many-return-statements, """Return a list of business/parties results from solr based from the given query.""" try: query = _clean_request_args(request.args.get('query', '')) + if not query: + return resource_utils.bad_request_response( + 'Invalid args', [{'missing param': "Expected url param 'query'."}]) + query_items = query.split('::') - if not query_items: - return jsonify({'message': "Expected url param 'query'."}), HTTPStatus.BAD_REQUEST value = '' party_name = '' parent_name = '' @@ -154,46 +187,48 @@ def parties(): # pylint: disable=too-many-branches, too-many-return-statements, with suppress(AttributeError): if param := _parse_url_param('value', item): value = param - elif param := _parse_url_param(SolrField.PARTY_NAME.value, item): + elif param := _parse_url_param(PartyField.PARTY_NAME.value, item): party_name = param - elif param := _parse_url_param(SolrField.PARENT_NAME.value, item): + elif param := _parse_url_param(PartyField.PARENT_NAME.value, item): parent_name = param - elif param := _parse_url_param(SolrField.PARENT_IDENTIFIER.value, item): + elif param := _parse_url_param(PartyField.PARENT_IDENTIFIER.value, item): parent_identifier = param - elif param := _parse_url_param(SolrField.PARENT_BN.value, item): + elif param := _parse_url_param(PartyField.PARENT_BN.value, item): parent_bn = param if not value: - return jsonify({'message': "Expected url param 'query' to have 'value:'."}), HTTPStatus.BAD_REQUEST + return resource_utils.bad_request_response( + 'Invalid args', + [{'query param': "Expected url param 'query' to have 'value:'."}] + ) # clean query values query = { - 'value': Solr.prep_query_str(value), - SolrField.PARTY_NAME_SINGLE.value: Solr.prep_query_str(party_name), - SolrField.PARENT_NAME_SINGLE.value: Solr.prep_query_str(parent_name), - SolrField.PARENT_IDENTIFIER_Q.value: Solr.prep_query_str(parent_identifier), - SolrField.PARENT_BN_Q.value: Solr.prep_query_str(parent_bn) + 'value': prep_query_str(value, True), + PartyField.PARTY_NAME_SINGLE.value: prep_query_str(party_name), + PartyField.PARENT_NAME_SINGLE.value: prep_query_str(parent_name), + PartyField.PARENT_IDENTIFIER_Q.value: prep_query_str(parent_identifier), + PartyField.PARENT_BN_Q.value: prep_query_str(parent_bn) } - # TODO: validate legal_type + state - legal_types = None - states = None - party_roles = None + search_categories = {} if categories := request.args.get('categories', '').split('::'): for category in categories: with suppress(AttributeError): - if param := _parse_url_param(SolrField.PARENT_TYPE.value, category): - legal_types = param.split(',') - elif param := _parse_url_param(SolrField.PARENT_STATE.value, category): - states = param.split(',') - elif param := _parse_url_param(SolrField.PARTY_ROLE.value, category): - party_roles = param.lower().split(',') + if param := _parse_url_param(PartyField.PARENT_TYPE.value, category): + search_categories[PartyField.PARENT_TYPE] = param.upper().split(',') + elif param := _parse_url_param(PartyField.PARENT_STATE.value, category): + search_categories[PartyField.PARENT_STATE] = param.upper().split(',') + elif param := _parse_url_param(PartyField.PARTY_ROLE.value, category): + search_categories[PartyField.PARTY_ROLE] = param.lower().split(',') # validate party roles + party_roles = search_categories.get(PartyField.PARTY_ROLE) if not party_roles: return jsonify( - {'message': f"Expected url param 'categories={SolrField.PARTY_ROLE.value}:...'."} + {'message': f"Expected url param 'categories={PartyField.PARTY_ROLE.value}:...'."} ), HTTPStatus.BAD_REQUEST + if [x for x in party_roles if x.lower() not in ['partner', 'proprietor']]: - return jsonify({'message': f"Expected '{SolrField.PARTY_ROLE.value}:' with values 'partner' and/or " + + return jsonify({'message': f"Expected '{PartyField.PARTY_ROLE.value}:' with values 'partner' and/or " + "'proprietor'. Other partyRoles are not implemented."}), HTTPStatus.BAD_REQUEST start = None @@ -205,32 +240,53 @@ def parties(): # pylint: disable=too-many-branches, too-many-return-statements, except ValueError: # catch invalid start/row entry return {'message': "Expected integer for params: 'start', 'rows'"}, HTTPStatus.BAD_REQUEST - params = SearchParams(query, start, rows, legal_types, states, party_roles) - results = parties_search(params) + # params = SearchParams(query, start, rows, legal_types, states, party_roles) + params = QueryParams(query=query, + start=start, + rows=rows, + categories=search_categories, + fields=business_solr.party_fields, + query_fields={ + PartyField.PARTY_NAME_Q: 'parent', + PartyField.PARTY_NAME_STEM_AGRO: 'parent', + PartyField.PARTY_NAME_SINGLE: 'parent', + PartyField.PARTY_NAME_XTRA_Q: 'parent'}, + query_boost_fields={ + PartyField.PARTY_NAME_Q: 2, + PartyField.PARTY_NAME_STEM_AGRO: 2, + PartyField.PARTY_NAME_SINGLE: 2}, + query_fuzzy_fields={ + PartyField.PARTY_NAME_Q: {'short': 1, 'long': 2}, + PartyField.PARTY_NAME_STEM_AGRO: {'short': 1, 'long': 2}, + PartyField.PARTY_NAME_SINGLE: {'short': 1, 'long': 2}}, + child_query={}, + child_categories={}, + child_date_ranges={}) + results = parties_search(params, business_solr) response = { - 'facets': Solr.parse_facets(results), + 'facets': parse_facets(results), 'searchResults': { 'queryInfo': { - 'rows': rows or search_solr.default_rows, 'query': { 'value': query['value'], - SolrField.PARTY_NAME.value: query[SolrField.PARTY_NAME_SINGLE.value] or '', - SolrField.PARENT_NAME.value: query[SolrField.PARENT_NAME_SINGLE.value] or '', - SolrField.PARENT_IDENTIFIER.value: query[SolrField.PARENT_IDENTIFIER_Q.value] or '', - SolrField.PARENT_BN.value: query[SolrField.PARENT_BN_Q.value] or '' + PartyField.PARTY_NAME.value: query[PartyField.PARTY_NAME_SINGLE.value] or '', + PartyField.PARENT_NAME.value: query[PartyField.PARENT_NAME_SINGLE.value] or '', + PartyField.PARENT_IDENTIFIER.value: query[PartyField.PARENT_IDENTIFIER_Q.value] or '', + PartyField.PARENT_BN.value: query[PartyField.PARENT_BN_Q.value] or '' }, 'categories': { - SolrField.PARENT_TYPE.value: legal_types or '', - SolrField.PARENT_STATE.value: states or '', - SolrField.PARTY_ROLE.value: party_roles or ''}, - 'start': results.get('response', {}).get('start')}, + PartyField.PARENT_TYPE.value: search_categories.get(PartyField.PARENT_TYPE, ''), + PartyField.PARENT_STATE.value: search_categories.get(PartyField.PARENT_STATE, ''), + PartyField.PARTY_ROLE.value: search_categories.get(PartyField.PARTY_ROLE, '')}, + 'rows': rows or business_solr.default_rows, + 'start': start or 0}, 'totalResults': results.get('response', {}).get('numFound'), 'results': results.get('response', {}).get('docs')}} return jsonify(response), HTTPStatus.OK except SolrException as solr_exception: - return resource_utils.solr_exception_response(solr_exception) + return resource_utils.exception_response(solr_exception) except Exception as default_exception: # noqa: B902 return resource_utils.default_exception_response(default_exception) @@ -243,19 +299,16 @@ def suggest(): query = request.args.get('query', None) if not query: return jsonify({'message': "Expected url param 'query'."}), HTTPStatus.BAD_REQUEST - query = Solr.prep_query_str(query) - - rows = None - with suppress(TypeError): - rows = int(request.args.get('rows', None)) - - highlight = bool(request.args.get('highlight', False)) + query = prep_query_str(query) - suggestions = business_suggest(query, highlight, rows) - return jsonify({'queryInfo': {'rows': rows, 'highlight': highlight, 'query': query}, - 'results': suggestions}), HTTPStatus.OK + suggestions = business_suggest(query, business_solr) + return jsonify({ + 'queryInfo': {'rows': 5, 'highlight': False, 'query': query}, + 'results': suggestions, + 'warnings': ['This call is depreciated. Please use "/facets" instead.'] + }), HTTPStatus.OK except SolrException as solr_exception: - return resource_utils.solr_exception_response(solr_exception) + return resource_utils.exception_response(solr_exception) except Exception as default_exception: # noqa: B902 return resource_utils.default_exception_response(default_exception) diff --git a/search-api/src/search_api/resources/v1/internal/__init__.py b/search-api/src/search_api/resources/v1/internal/__init__.py index 88f9f359..5da4a785 100644 --- a/search-api/src/search_api/resources/v1/internal/__init__.py +++ b/search-api/src/search_api/resources/v1/internal/__init__.py @@ -14,8 +14,8 @@ """Exposes all of the internal endpoints in Flask-Blueprint style.""" from flask import Blueprint -from .update_solr import bp as update_solr_bp +from .solr import bp as solr_bp bp = Blueprint('INTERNAL', __name__, url_prefix='/internal') # pylint: disable=invalid-name -bp.register_blueprint(update_solr_bp) +bp.register_blueprint(solr_bp) diff --git a/search-api/src/search_api/resources/v1/internal/solr/__init__.py b/search-api/src/search_api/resources/v1/internal/solr/__init__.py new file mode 100644 index 00000000..34d0342e --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/__init__.py @@ -0,0 +1,25 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Exposes all of the internal solr endpoints in Flask-Blueprint style.""" +from flask import Blueprint + +from .command import bp as command_bp +from .imports import bp as import_bp +from .update import bp as update_bp + + +bp = Blueprint('SOLR', __name__, url_prefix='/solr') # pylint: disable=invalid-name +bp.register_blueprint(command_bp) +bp.register_blueprint(import_bp) +bp.register_blueprint(update_bp) diff --git a/search-api/src/search_api/resources/v1/internal/solr/command.py b/search-api/src/search_api/resources/v1/internal/solr/command.py new file mode 100644 index 00000000..581e6ba9 --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/command.py @@ -0,0 +1,52 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""API endpoint for backing up / restoring a solr index.""" +from flask import Blueprint, jsonify, request +from flask_cors import cross_origin + +import search_api.resources.utils as resource_utils +from search_api.exceptions import SolrException +from search_api.services import SYSTEM_ROLE, business_solr +from search_api.utils.auth import jwt + + +bp = Blueprint('COMMAND', __name__, url_prefix='/command') # pylint: disable=invalid-name + + +@bp.post('') +@cross_origin(origin='*') +@jwt.requires_roles([SYSTEM_ROLE]) +def replication_command(): + """Execute a replication command on solr.""" + try: + request_json: dict = request.json + + # validate payload + if not (command := request_json.get('command')): + return resource_utils.bad_request_response('Invalid payload.', + [{'Missing Required Field': 'Expected "command" in payload.'}]) + + valid_commands = ['backup', 'details', 'restore', 'restorestatus'] + if command not in valid_commands: + return resource_utils.bad_request_response('Invalid payload.', + [{'error': f'Expected value to be one of {valid_commands}', + 'path': '/command'}]) + + resp = business_solr.replication(command) + return jsonify(resp.json()), resp.status_code + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) diff --git a/search-api/src/search_api/resources/v1/internal/solr/imports.py b/search-api/src/search_api/resources/v1/internal/solr/imports.py new file mode 100644 index 00000000..dd8f8fdb --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/imports.py @@ -0,0 +1,60 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""API endpoint for bulk importing entity records into solr.""" +from http import HTTPStatus + +from flask import Blueprint, current_app, jsonify, request +from flask_cors import cross_origin + +import search_api.resources.utils as resource_utils +from search_api.exceptions import SolrException +from search_api.services import SYSTEM_ROLE, business_solr +from search_api.services.business_solr.doc_models import BusinessDoc +from search_api.utils.auth import jwt + + +bp = Blueprint('IMPORT', __name__, url_prefix='/import') # pylint: disable=invalid-name + + +@bp.put('') +@cross_origin(origin='*') +@jwt.requires_roles([SYSTEM_ROLE]) +def import_businesses(): + """Import businesses into Business SOLR.""" + try: + request_json: dict = request.json + if not (doc_list := request_json.get('businesses', [])): + return resource_utils.bad_request_response('Invalid payload.', ['Expecting required field: "businesses"']) + + if (timeout := int(request_json.get('timeout', '25'))) > 200: + return resource_utils.bad_request_response('Invalid payload.', + ['Expecting desired "timeout" to be under 200.']) + + if request_json.get('type') == 'partial': + # NOTE: raw_docs may be partial data and/or child documents + current_app.logger.debug('Sending partials list to SOLR...') + business_solr.create_or_replace_docs(raw_docs=doc_list, timeout=timeout) + else: + current_app.logger.debug('Translating import payload to entity docs...') + businesses = [BusinessDoc(**e) for e in doc_list] + current_app.logger.debug('Sending business docs to SOLR...') + business_solr.create_or_replace_docs(docs=businesses, timeout=timeout, additive=False) + + current_app.logger.debug('Import completed.') + return jsonify({'message': 'Import finished.'}), HTTPStatus.CREATED + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) diff --git a/search-api/src/search_api/resources/v1/internal/solr/update/__init__.py b/search-api/src/search_api/resources/v1/internal/solr/update/__init__.py new file mode 100644 index 00000000..b06226ce --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/update/__init__.py @@ -0,0 +1,126 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Exposes all of the update endpoints in Flask-Blueprint style.""" +import re +from dataclasses import asdict +from http import HTTPStatus + +from flask import Blueprint, g, jsonify, request +from flask_cors import cross_origin + +import search_api.resources.utils as resource_utils +from search_api.enums import SolrDocEventType +from search_api.exceptions import SolrException +from search_api.models import SolrDoc, SolrDocEvent, User +from search_api.services import SYSTEM_ROLE +from search_api.services.business_solr.doc_models import BusinessDoc, PartyDoc +from search_api.services.validator import RequestValidator +from search_api.utils.auth import jwt + +from .resync import bp as resync_bp +from .sync import bp as sync_bp + + +bp = Blueprint('UPDATE', __name__, url_prefix='/update') # pylint: disable=invalid-name +bp.register_blueprint(resync_bp) +bp.register_blueprint(sync_bp) + + +@bp.put('') +@cross_origin(origin='*') +@jwt.requires_roles([SYSTEM_ROLE]) +def update_business(): + """Add/Update business in solr.""" + try: + request_json: dict = request.json + errors = RequestValidator.validate_solr_update_request(request_json) + if errors: + return resource_utils.bad_request_response('Invalid payload.', errors) + + user = User.get_or_create_user_by_jwt(g.jwt_oidc_token_info) + + business = _parse_business(request_json) + # commit business. Ensures other flows (i.e. resync) will use the current data + solr_doc = SolrDoc(doc=asdict(business), identifier=business.identifier, _submitter_id=user.id).save() + SolrDocEvent(event_type=SolrDocEventType.UPDATE, solr_doc_id=solr_doc.id).save() + # SOLR update will be triggered by job (does a frequent bulk update to solr) + + return jsonify({'message': 'Update accepted.'}), HTTPStatus.ACCEPTED + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) + + +def _parse_business(request_json: dict) -> BusinessDoc: + """Return the solr doc for the json data.""" + def needs_bc_prefix(identifier: str, legal_type: str) -> bool: + """Return if the identifier should have the BC prefix or not.""" + numbers_only_rgx = r'^[0-9]+$' + # TODO: get legal types from shared enum + return legal_type in ['BEN', 'BC', 'CC', 'ULC'] and re.search(numbers_only_rgx, identifier) + + def get_party_name(officer: dict[str, str]) -> str: + """Return the parsed name of the party in the given doc info.""" + if officer.get('organizationName'): + return officer['organizationName'].strip() + person_name = '' + if officer.get('firstName'): + person_name += officer['firstName'].strip() + if officer.get('middleInitial'): + person_name += ' ' + officer['middleInitial'].strip() + if officer.get('lastName'): + person_name += ' ' + officer['lastName'].strip() + return person_name.strip() + + business_info = request_json.get('business') + party_info = request_json.get('parties') + + # add new base doc + identifier = business_info['identifier'] + legal_type = business_info['legalType'] + if needs_bc_prefix(identifier, legal_type): + identifier = f'BC{identifier}' + + business_doc = BusinessDoc( + bn=business_info.get('taxId'), + id=identifier, + identifier=identifier, + legalType=legal_type, + name=business_info['legalName'].strip(), + status=business_info['state'], + goodStanding=business_info.get('goodStanding')) + + if party_info: + party_list = [] + # add party doc to base doc + for party in party_info: + party_doc = PartyDoc( + id=f"{business_doc.identifier}_{str(party['officer']['id'])}", + parentBN=business_doc.bn, + parentIdentifier=business_doc.identifier, + parentLegalType=business_doc.legalType, + parentName=business_doc.name, + parentStatus=business_doc.status, + partyName=get_party_name(party['officer']), + partyRoles=[x['roleType'].lower() for x in party['roles']], + partyType=party['officer']['partyType'] + ) + party_list.append(party_doc) + + if party_list: + business_doc.parties = party_list + # add doc to updates table + return business_doc diff --git a/search-api/src/search_api/resources/v1/internal/solr/update/resync.py b/search-api/src/search_api/resources/v1/internal/solr/update/resync.py new file mode 100644 index 00000000..5df485d6 --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/update/resync.py @@ -0,0 +1,67 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""API endpoint for resyncing entity records in solr.""" +from datetime import datetime, timedelta +from http import HTTPStatus + +from flask import Blueprint, current_app, jsonify, request +from flask_cors import cross_origin + +import search_api.resources.utils as resource_utils +from search_api.exceptions import SolrException +from search_api.models import SolrDoc +from search_api.request_handlers import resync_business_solr +from search_api.services import SYSTEM_ROLE +from search_api.utils.auth import jwt + + +bp = Blueprint('RESYNC', __name__, url_prefix='/resync') # pylint: disable=invalid-name + + +@bp.post('') +@cross_origin(origin='*') +@jwt.requires_roles([SYSTEM_ROLE]) +def resync_solr(): + """Resync solr docs from the given date or identifiers given.""" + try: + request_json = request.json + from_datetime = datetime.utcnow() + minutes_offset = request_json.get('minutesOffset', None) + identifiers_to_resync = request_json.get('identifiers', None) + if not minutes_offset and not identifiers_to_resync: + return resource_utils.bad_request_response('Missing required field "minutesOffset" or "identifiers".') + try: + minutes_offset = float(minutes_offset) + except: # pylint: disable=bare-except # noqa F841; + if not identifiers_to_resync: + return resource_utils.bad_request_response( + 'Invalid value for field "minutesOffset". Expecting a number.') + + if minutes_offset: + # get all updates since the from_datetime + resync_date = from_datetime - timedelta(minutes=minutes_offset) + identifiers_to_resync = SolrDoc.get_updated_identifiers_after_date(resync_date) + + if identifiers_to_resync: + current_app.logger.debug(f'Resyncing: {identifiers_to_resync}') + resync_business_solr(identifiers_to_resync) + else: + current_app.logger.debug('No records to resync.') + + return jsonify({'message': 'Resync successful.'}), HTTPStatus.CREATED + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) diff --git a/search-api/src/search_api/resources/v1/internal/solr/update/sync.py b/search-api/src/search_api/resources/v1/internal/solr/update/sync.py new file mode 100644 index 00000000..8e1cef23 --- /dev/null +++ b/search-api/src/search_api/resources/v1/internal/solr/update/sync.py @@ -0,0 +1,135 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""API endpoint for syncing entity records in solr.""" +from datetime import UTC, datetime, timedelta +from http import HTTPStatus + +from flask import Blueprint, current_app, jsonify +from flask_cors import cross_origin + +import search_api.resources.utils as resource_utils +from search_api.enums import SolrDocEventStatus, SolrDocEventType +from search_api.exceptions import SolrException +from search_api.models import SolrDoc, SolrDocEvent +from search_api.request_handlers import update_business_solr +from search_api.services import business_solr +from search_api.services.business_solr.doc_fields import BusinessField + + +bp = Blueprint('SYNC', __name__, url_prefix='/sync') # pylint: disable=invalid-name + + +@bp.get('') +@cross_origin(origin='*') +def sync_solr(): + """Sync docs in the DB that haven't been applied to SOLR yet.""" + try: + pending_update_events = SolrDocEvent.get_events_by_status(statuses=[SolrDocEventStatus.PENDING, + SolrDocEventStatus.ERROR], + event_type=SolrDocEventType.UPDATE, + limit=current_app.config.get('MAX_BATCH_UPDATE_NUM')) + + identifiers_to_sync = [(SolrDoc.get_by_id(event.solr_doc_id)).identifier for event in pending_update_events] + current_app.logger.debug(f'Syncing: {identifiers_to_sync}') + if identifiers_to_sync: + update_business_solr(identifiers_to_sync, pending_update_events) + return jsonify({'message': 'Sync successful.'}), HTTPStatus.OK + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) + + +@bp.get('/heartbeat') +@cross_origin(origin='*') +def sync_follower_heartbeat(): # pylint: disable=too-many-branches,too-many-statements + """Verify the solr follower instance is serving updated/synced records.""" + try: + now = datetime.now(UTC) + if business_solr.follower_url != business_solr.leader_url: + # verify the follower core details + details: dict = (business_solr.replication('details', False)).json()['details'] + # NOTE: replace tzinfo needed because strptime %Z is not working as documented + # - issue: accepts the tz in the string but doesn't add it to the dateime obj + last_replication = (datetime.strptime(details['follower']['indexReplicatedAt'], + '%a %b %d %H:%M:%S %Z %Y')).replace(tzinfo=UTC) + current_app.logger.debug(f'Last replication was at {last_replication.isoformat()}') + + errors = [] + # verify polling is active + if details['follower']['isPollingDisabled'] == 'true': + errors.append('Follower polling disabled when it should be enabled.') + + # verify last_replication datetime is within a reasonable timeframe + if last_replication + timedelta(hours=current_app.config.get('LAST_REPLICATION_THRESHOLD')) < now: + # its been too long since a replication. Log / return error + errors.append('Follower last replication datetime is longer than expected.') + + if errors: + current_app.logger.error(errors) + return jsonify({'errors': errors}), HTTPStatus.INTERNAL_SERVER_ERROR + + # verify an update that happened in the last hour (if there is one) + events_to_verify = SolrDocEvent.get_events_by_status(statuses=[SolrDocEventStatus.COMPLETE], + event_type=SolrDocEventType.UPDATE, + start_date=now - timedelta(minutes=60), + limit=2) + + if len(events_to_verify) == 0 or events_to_verify[0].event_date + timedelta(minutes=5) > now: + # either no updates to check or the event may not be reflected in the search yet + current_app.logger.debug('No update events to verify in the last hour.') + else: + # there was an update in the last hour and it is at least 5 minutes old + doc_obj_to_verify = SolrDoc.get_by_id(events_to_verify[0].solr_doc_id) + + most_recent_business_doc = SolrDoc.find_most_recent_by_identifier(doc_obj_to_verify.identifier) + if most_recent_business_doc.id != doc_obj_to_verify.id: + # there's been an update since so skip verification of this event + current_app.logger.debug('Update event has been altered since. Skipping verification.') + else: + current_app.logger.debug(f'Verifying sync for: {doc_obj_to_verify.identifier}...') + doc: dict = doc_obj_to_verify.doc + response = business_solr.query({'query': f"id:{doc['id']}", 'fields': '*, [child]'}) + + business: dict = response['response']['docs'][0] if response['response']['docs'] else {} + + # verify important elements match the update + fields = [ + BusinessField.NAME, BusinessField.IDENTIFIER, BusinessField.TYPE, + BusinessField.STATE, BusinessField.GOOD_STANDING, BusinessField.BN + ] + is_equal = True + for field in fields: + if business.get(field.value) != doc.get(field.value): + current_app.logger.debug(f'{field} mismatch') + is_equal = False + break + + if not is_equal: + # data returned from the follower does match the update or is not there + current_app.logger.debug(f'Business expected: {doc}') + current_app.logger.debug(f'Business served: {business}') + message = f'Follower failed to update entity: {doc_obj_to_verify.identifier}.' + current_app.logger.error(message) + return jsonify({'message': message}), HTTPStatus.INTERNAL_SERVER_ERROR + + current_app.logger.debug(f'Sync verified for: {doc_obj_to_verify.identifier}') + + return jsonify({'message': 'Follower synchronization is healthy.'}), HTTPStatus.OK + + except SolrException as solr_exception: + return resource_utils.exception_response(solr_exception) + except Exception as exception: # noqa: B902 + return resource_utils.default_exception_response(exception) diff --git a/search-api/src/search_api/resources/v1/internal/update_solr.py b/search-api/src/search_api/resources/v1/internal/update_solr.py deleted file mode 100644 index 3ebb0221..00000000 --- a/search-api/src/search_api/resources/v1/internal/update_solr.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""API endpoint for updating/adding business record in solr.""" -import re -from dataclasses import asdict -from http import HTTPStatus -from typing import Dict - -from datetime import datetime, timedelta -from flask import Blueprint, current_app, g, jsonify, request -from flask_cors import cross_origin - -import search_api.resources.utils as resource_utils -from search_api.enums import SolrDocEventType -from search_api.exceptions import SolrException -from search_api.models import SolrDoc, User -from search_api.request_handlers import update_search_solr -from search_api.services import is_system -from search_api.services.solr.solr_docs import BusinessDoc, PartyDoc -from search_api.services.validator import RequestValidator -from search_api.utils.auth import jwt - - -bp = Blueprint('UPDATE', __name__, url_prefix='/solr/update') # pylint: disable=invalid-name - - -@bp.put('') -@cross_origin(origin='*') -@jwt.requires_auth -def update_solr(): - """Add/Update business in solr.""" - try: - if not is_system(jwt): - # system only endpoint - return jsonify({'message': 'Not authorized to update a solr doc.'}), HTTPStatus.UNAUTHORIZED - - request_json = request.json - errors = RequestValidator.validate_solr_update_request(request_json) - if errors: - return resource_utils.bad_request_response(errors) - - user = User.get_or_create_user_by_jwt(g.jwt_oidc_token_info) - - solr_doc = _prepare_data(request_json) - # commit so that other flows will take this record as most recent for this identifier - solr_doc_update = SolrDoc(doc=asdict(solr_doc), identifier=solr_doc.identifier, _submitter_id=user.id).save() - - update_search_solr(solr_doc_update.identifier, SolrDocEventType.UPDATE) - return jsonify({'message': 'Update successful'}), HTTPStatus.OK - - except SolrException as solr_exception: - return resource_utils.solr_exception_response(solr_exception) - except Exception as default_exception: # noqa: B902 - return resource_utils.default_exception_response(default_exception) - - -@bp.post('/resync') -@cross_origin(origin='*') -def resync_solr(): - """Resync solr docs from the given date.""" - try: - request_json = request.json - from_datetime = datetime.utcnow() - minutes_offset = request_json.get('minutesOffset', None) - identifiers_to_resync = request_json.get('identifiers', None) - if not minutes_offset and not identifiers_to_resync: - return resource_utils.bad_request_response('Missing required field "minutesOffset" or "identifiers".') - try: - minutes_offset = float(minutes_offset) - except: # pylint: disable=bare-except # noqa F841; - if not identifiers_to_resync: - return resource_utils.bad_request_response( - 'Invalid value for field "minutesOffset". Expecting a number.') - - if minutes_offset: - # get all updates since the from_datetime - resync_date = from_datetime - timedelta(minutes=minutes_offset) - identifiers_to_resync = SolrDoc.get_updated_identifiers_after_date(resync_date) - - current_app.logger.debug(f'Resyncing: {identifiers_to_resync}') - # update docs - for identifier in identifiers_to_resync: - try: - update_search_solr(identifier, SolrDocEventType.RESYNC) - except SolrException: - # log error so that ops can resync the business without redoing the whole batch - current_app.logger.error('Failed to resync %s', identifier) - - return jsonify({'message': 'Resync successful.'}), HTTPStatus.CREATED - - except SolrException as solr_exception: - return resource_utils.solr_exception_response(solr_exception) - except Exception as default_exception: # noqa: B902 - return resource_utils.default_exception_response(default_exception) - - -def _prepare_data(request_json: Dict) -> BusinessDoc: - """Return the solr doc for the json data.""" - def needs_bc_prefix(identifier: str, legal_type: str) -> bool: - """Return if the identifier should have the BC prefix or not.""" - numbers_only_rgx = r'^[0-9]+$' - # TODO: get legal types from shared enum - return legal_type in ['BEN', 'BC', 'CC', 'ULC'] and re.search(numbers_only_rgx, identifier) - - def get_party_name(officer: Dict[str, str]) -> str: - """Return the parsed name of the party in the given doc info.""" - if officer.get('organizationName'): - return officer['organizationName'].strip() - person_name = '' - if officer.get('firstName'): - person_name += officer['firstName'].strip() - if officer.get('middleInitial'): - person_name += ' ' + officer['middleInitial'].strip() - if officer.get('lastName'): - person_name += ' ' + officer['lastName'].strip() - return person_name.strip() - - business_info = request_json.get('business') - party_info = request_json.get('parties') - - # add new base doc - identifier = business_info['identifier'] - legal_type = business_info['legalType'] - business_doc = BusinessDoc( - bn=business_info.get('taxId'), - identifier=f'BC{identifier}' if needs_bc_prefix(identifier, legal_type) else identifier, - legalType=legal_type, - name=business_info['legalName'].strip(), - status=business_info['state'], - goodStanding=business_info.get('goodStanding')) - - if party_info: - party_list = [] - # add party doc to base doc - for party in party_info: - party_doc = PartyDoc( - parentBN=business_doc.bn, - parentLegalType=business_doc.legalType, - parentName=business_doc.name, - parentStatus=business_doc.status, - partyName=get_party_name(party['officer']), - partyRoles=[x['roleType'].lower() for x in party['roles']], - partyType=party['officer']['partyType'] - ) - party_list.append(party_doc) - - if party_list: - business_doc.parties = party_list - # add doc to updates table - return business_doc diff --git a/search-api/src/search_api/resources/v1/meta.py b/search-api/src/search_api/resources/v1/meta.py index b52c008b..0723a24b 100644 --- a/search-api/src/search_api/resources/v1/meta.py +++ b/search-api/src/search_api/resources/v1/meta.py @@ -16,7 +16,6 @@ Currently this only provides API versioning information """ from flask import jsonify, Blueprint -from registry_schemas import __version__ as registry_schemas_version from search_api.utils.run_version import get_run_version @@ -28,4 +27,4 @@ def info(): """Return a JSON object with meta information about the Service.""" version = get_run_version() - return jsonify(API=f'search_api/{version}', SCHEMAS=f'registry_schemas/{registry_schemas_version}') + return jsonify(API=f'search_api/{version}') diff --git a/search-api/src/search_api/services/__init__.py b/search-api/src/search_api/services/__init__.py index f204bab0..c333f2ae 100644 --- a/search-api/src/search_api/services/__init__.py +++ b/search-api/src/search_api/services/__init__.py @@ -14,14 +14,14 @@ """This module wraps the calls to external services used by the API.""" from .authz import BASIC_USER, SBC_STAFF, STAFF_ROLE, SYSTEM_ROLE, get_role, is_staff, is_system +from .business_solr import BusinessSolr from .flags import Flags from .queue import Queue -from .solr import Solr flags = Flags() # pylint: disable=invalid-name; shared variables are lower case by Flask convention. -# TODO: uncomment after testing with running gcp service +business_solr = BusinessSolr('SOLR_SVC_BUS') + queue = Queue() # pylint: disable=invalid-name; shared variables are lower case by Flask convention. -search_solr = Solr() # pylint: disable=invalid-name; shared variables are lower case by Flask convention. # TODO: uncomment after testing with running gcp service # storage = GoogleStorageService() # pylint: disable=invalid-name; shared variables are lower case by Flask convention. diff --git a/search-api/src/search_api/services/base_solr/__init__.py b/search-api/src/search_api/services/base_solr/__init__.py new file mode 100644 index 00000000..bc0a5ddd --- /dev/null +++ b/search-api/src/search_api/services/base_solr/__init__.py @@ -0,0 +1,204 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This module wraps the solr classes/fields for using registries search solr.""" + + +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from contextlib import suppress +from http import HTTPStatus + +from flask import Flask, current_app +from requests import Response, Session +from requests.adapters import HTTPAdapter, Retry +from requests.exceptions import ConnectionError as SolrConnectionError + +from search_api.exceptions import SolrException +from search_api.utils.base import BaseEnum + + +class Solr: + """Wrapper class around the solr instance.""" + + def __init__(self, config_prefix: str, app: Flask = None): + """Initialize the solr class.""" + self.app = None + self.config_prefix = config_prefix + + # solr cores + self.follower_core = None + self.leader_core = None + # solr urls + self.follower_url = None + self.leader_url = None + + self.default_start = 0 + self.default_rows = 10 + + # base urls + self.reload_url = '{url}/admin/cores?action=RELOAD&core={core}' + self.replication_url = '{url}/{core}/replication' + self.search_url = '{url}/{core}/query' + self.suggest_url = '{url}/{core}/suggest' + self.synonyms_url = '{url}/{core}/schema/analysis/synonyms' + self.update_url = '{url}/{core}/update?commit=true&overwrite=true&wt=json' + self.bulk_update_url = '{url}/{core}/update?overwrite=true&wt=json' + + if app: + self.init_app(app) + + def init_app(self, app: Flask): + """Initialize the Solr environment.""" + self.app = app + + # NOTE: for a single core implementation set leader/follower cores the same + self.leader_core = app.config.get(f'{self.config_prefix}_LEADER_CORE') + self.follower_core = app.config.get(f'{self.config_prefix}_FOLLOWER_CORE') + # NOTE: for a single node implementation set the leader/follower urls the same + self.leader_url = app.config.get(f'{self.config_prefix}_LEADER_URL') + self.follower_url = app.config.get(f'{self.config_prefix}_FOLLOWER_URL') + + # pylint: disable=too-many-arguments + def call_solr(self, + method: str, + query: str, + params: dict = None, + json_data: dict = None, + xml_data: str = None, + leader=True, + timeout=25) -> Response: + """Call solr instance with given params.""" + base_url = self.leader_url if leader else self.follower_url + core = self.leader_core if leader else self.follower_core + url = query.format(url=base_url, core=core) + retries = Retry(total=1, + backoff_factor=1, + status_forcelist=[500, 502, 503, 504], + allowed_methods=['GET', 'POST']) + session = Session() + session.mount(url, HTTPAdapter(max_retries=retries)) + + response = None + try: + if method == 'GET': + response = session.get(url, params=params, timeout=timeout) + elif method == 'POST' and json_data: + response = session.post(url=url, json=json_data, timeout=timeout) + elif method == 'PUT' and json_data: + response = session.put(url=url, json=json_data, timeout=timeout) + elif method == 'POST' and xml_data: + headers = {'Content-Type': 'application/xml'} + response = session.post(url=url, data=xml_data, headers=headers, timeout=timeout) + else: + current_app.logger.debug( + f'Invalid function params: {method}, {query}, {params}, {json_data}, {xml_data}') + raise Exception('Invalid params given.') # pylint: disable=broad-exception-raised + # check for error + if response.status_code != HTTPStatus.OK: + error = response.json().get('error', {}).get('msg', 'Error handling Solr request.') + raise Exception(error) # pylint: disable=broad-exception-raised; + + return response + + except SolrConnectionError as err: + current_app.logger.debug(err.with_traceback(None)) + raise SolrException( + error='Connection error while handling Solr request.', + status_code=HTTPStatus.GATEWAY_TIMEOUT) from err + except Exception as err: # noqa B902 + current_app.logger.debug(err.with_traceback(None)) + current_app.logger.debug(err) + current_app.logger.debug(response) + current_app.logger.debug(response.status_code) + current_app.logger.debug(response.json()) + msg = 'Error handling Solr request.' + status_code = HTTPStatus.INTERNAL_SERVER_ERROR + with suppress(Exception): + status_code = response.status_code + msg = response.json().get('error', {}).get('msg', msg) + current_app.logger.debug(msg) + raise SolrException(error=msg, status_code=status_code) from err + + def create_or_update_synonyms(self, synonym_type: BaseEnum, synonyms: dict[str: list[str]]): + """Create or update solr docs in the core.""" + return self.call_solr('PUT', f'{self.synonyms_url}/{synonym_type.value}', json_data=synonyms, timeout=180) + + def delete_all_docs(self): + """Delete all solr docs from the core.""" + payload = '*:*' + response = self.call_solr('POST', self.update_url, xml_data=payload, timeout=60) + return response + + def delete_docs(self, unique_keys: list[str]): + """Delete solr docs from the core.""" + payload = '' + if unique_keys: + # TODO: set unique key in init + payload += f'id:{unique_keys[0].upper()}' + for key in unique_keys[1:]: + payload += f' OR id:{key.upper()}' + payload += '' + + response = self.call_solr('POST', self.update_url, xml_data=payload, timeout=60) + return response + + def query(self, payload: dict[str, str], start: int = None, rows: int = None) -> dict: + """Return a list of solr docs from the solr query handler for the given params.""" + payload['offset'] = start if start else self.default_start + payload['limit'] = rows if rows else self.default_rows + response = self.call_solr('POST', self.search_url, json_data=payload, leader=False) + return response.json() + + def suggest(self, query: str, rows: int, build: bool = False) -> list[str]: + """Return a list of suggestions from the solr suggest handler for the given query.""" + suggest_params = { + 'suggest.q': query, + 'suggest.count': rows if rows else self.default_rows, + 'suggest.build': str(build).lower() + } + # call solr + response = self.call_solr('GET', self.suggest_url, suggest_params) + # parse response + suggestions = response.json() \ + .get('suggest', {}).get('name', {}).get(query, {}).get('suggestions', []) + return [x.get('term', '').upper() for x in suggestions] # i.e. returning list = ['COMPANY 1', 'COMPANY 2', ...] + + def reload_core(self): + """Reload the solr core.""" + current_app.logger.info('Reloading core...') + reload = self.call_solr(method='GET', query=self.reload_url) + current_app.logger.info('Core reloaded.') + return reload + + def replication(self, command: str, leader=True): + """Send a replication command to solr.""" + current_app.logger.info(f'Sending {command} command to {"leader" if leader else "follower"}') + resp = self.call_solr(method='GET', + query=self.replication_url, + params={'command': command}, + leader=leader) + current_app.logger.info(f'{command} command executed.') + return resp diff --git a/search-api/src/search_api/schemas.py b/search-api/src/search_api/services/base_solr/utils/__init__.py similarity index 62% rename from search-api/src/search_api/schemas.py rename to search-api/src/search_api/services/base_solr/utils/__init__.py index ca1761a2..c04ed699 100644 --- a/search-api/src/search_api/schemas.py +++ b/search-api/src/search_api/services/base_solr/utils/__init__.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Province of British Columbia +# Copyright © 2023 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,11 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""Create the schema manager to be initialized inThe flask create_app.""" -from registry_schemas.flask import SchemaServices - - -rsbc_schemas = SchemaServices() # pylint: disable=invalid-name - -__all__ = ('rsbc_schemas') # pylint: disable=superfluous-parens,invalid-all-format +"""This module manages helpful util functions for using the solr service.""" +from .formatting_helpers import parse_facets, prep_query_str +from .query_builder import QueryBuilder +from .query_params import QueryParams diff --git a/search-api/src/search_api/services/base_solr/utils/formatting_helpers.py b/search-api/src/search_api/services/base_solr/utils/formatting_helpers.py new file mode 100644 index 00000000..9304e1d9 --- /dev/null +++ b/search-api/src/search_api/services/base_solr/utils/formatting_helpers.py @@ -0,0 +1,62 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Solr formatting functions.""" +import re + + +def parse_facets(facet_data: dict) -> dict: + """Return formatted solr facet response data.""" + facet_info = facet_data.get('facets', {}) + facets = {} + for category in facet_info: + if category == 'count': + continue + facets[category] = [] + for item in facet_info[category]['buckets']: + new_category = {'value': item['val'], 'count': item['count']} + if parent_count := item.get('by_parent', None): + new_category['parentCount'] = parent_count + facets[category].append(new_category) + + return {'fields': facets} + + +def prep_query_str(query: str, replace_specials=False) -> str: + r"""Return the query string prepped for solr call (more advanced method). + + Rules: + - no doubles: &,+ + - escape beginning: +,-,/,! + - escape everywhere: ",:,[,],*,~,<,>,?,\ + - remove: (,),^,{,},|,\ + - lowercase: all + """ + if not query: + return '' + + rmv_doubles = r'([&+]){2,}' + rmv_all = r'([()^{}|\\])' + esc_begin = r'(^|\s)([+\-/!])' + esc_all = r'([:~<>?\"\[\]])' + special_and = r'([&+])' + special_dash = r'(\S)(-)(\S)' + + query = re.sub(rmv_doubles, r'\1', query.lower()) + query = re.sub(rmv_all, '', query) + if replace_specials: + query = re.sub(special_and, r' and ', query) + query = re.sub(special_dash, r' - ', query) + query = re.sub(esc_begin, r'\1\\\2', query) + query = re.sub(esc_all, r'\\\1', query) + return query.lower().replace(' ', ' ').strip() diff --git a/search-api/src/search_api/services/base_solr/utils/query_builder.py b/search-api/src/search_api/services/base_solr/utils/query_builder.py new file mode 100644 index 00000000..addea7fa --- /dev/null +++ b/search-api/src/search_api/services/base_solr/utils/query_builder.py @@ -0,0 +1,150 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manages common solr query building methods.""" +import re + +from search_api.utils.base import BaseEnum + + +class QueryBuilder: + """Manages shared query building code.""" + + identifier_field_values = None + pre_child_filter_clause = None + + def __init__(self, identifier_field_values: list[str], unique_parent_field: BaseEnum): + """Initialize the solr class.""" + self.identifier_field_values = identifier_field_values + self.pre_child_filter_clause = "{!parent which = '-_nest_path_:* " + unique_parent_field.value + ":*'}" + + def create_clause(self, field_value: str, term: str, is_child=False) -> str: + """Return the query clause for the field and term.""" + corp_prefix_regex = r'(^[aA-zZ]+)[0-9]+$' + + search_field = field_value + if is_child: + search_field = self.pre_child_filter_clause + search_field + + if field_value in self.identifier_field_values and (identifier := re.search(corp_prefix_regex, term)): + prefix = identifier.group(1) + no_prefix_term = term.replace(prefix, '', 1) + + return f'({search_field}:"{no_prefix_term}" AND {search_field}:"{prefix.upper()}")' + + return f'{search_field}:{term}' + + def build_child_query(self, child_query: dict[str, str]) -> str | None: + """Return the child query fq.""" + # add filter clauses for child query items + child_q = '' + for key in child_query: + if not child_query[key]: + continue + + terms = child_query[key].split() + if not child_q: + child_q = self.create_clause(key, terms[0], True) + else: + child_q += f' AND {self.create_clause(key, terms[0], True)}' + + for term in terms[1:]: + child_q += f' AND {self.create_clause(key, term, True)}' + + if not child_q: + return None + + return f'({child_q})' + + def build_facet_query(self, + field: BaseEnum, + values: list[str], is_nested: bool = False) -> str: + """Return the facet filter clause for the given params.""" + filter_q = f'{field.value}:("{values[0]}"' + if is_nested: + filter_q = self.pre_child_filter_clause + f'{field.value}:"{values[0]}"' + for val in values[1:]: + if is_nested: + filter_q += f' OR {field.value}: "{val}"' + else: + filter_q += f' OR "{val}"' + if not is_nested: + filter_q += ')' + return filter_q + + def build_base_query(self, + query: dict[str, str], # pylint: disable=too-many-arguments,too-many-branches + fields: dict[BaseEnum, str], + boost_fields: dict[BaseEnum, int], + fuzzy_fields: dict[BaseEnum, dict[str, int]]) -> dict[str, list[str]]: + """Return a solr query with filters for each subsequent term.""" + terms = query['value'].split() + + query_clause = '' + for term in terms: + # each term only needs to match one of the given fields, but all terms must match at least 1 + term_clause = '' + for field, level in fields.items(): + field_clause = self.create_clause(field.value, term, level == 'child') + pre_boost_clause = field_clause + # add boost + if field in boost_fields: + field_clause += f'^{boost_fields[field]}' + + term_clause = self.join_clause(term_clause, field_clause, 'OR') + # add fuzzy matching + if field in fuzzy_fields \ + and (fuzzy_str := self.get_fuzzy_str(term, + fuzzy_fields[field]['short'], + fuzzy_fields[field]['long'])): + # add another with fuzzy (this one will give a lower score on a hit if the original has a boost) + term_clause = self.join_clause(term_clause, f'{pre_boost_clause}{fuzzy_str}', 'OR') + + query_clause = self.join_clause(query_clause, f'({term_clause})', 'AND') + + # extra filters + filters = [] + for key in query: + if key in ['value'] or not query[key]: + continue + terms = query[key].split() + for term in terms: + filters.append(self.create_clause(key, term)) + + return {'query': query_clause, 'filter': filters} + + @staticmethod + def build_facet(field: BaseEnum, is_nested: bool) -> dict[str, dict]: + """Return the facet dict for the field.""" + facet = {field.value: {'type': 'terms', 'field': field.value}} + if is_nested: + facet[field.value]['domain'] = {'blockChildren': '{!v=$parents}'} + facet[field.value]['facet'] = {'by_parent': 'uniqueBlock({!v=$parents})'} + + return facet + + @staticmethod + def get_fuzzy_str(term: str, short: int, long: int) -> str: + """Return the fuzzy string for the term.""" + if len(term) < 4: + return '' + if len(term) < 7: + return f'~{short}' + return f'~{long}' + + @staticmethod + def join_clause(current_clause: str, new_clause: str, join_str: str): + """Return the current clause added with the new clause.""" + if current_clause: + current_clause += f' {join_str} ' + return current_clause + new_clause diff --git a/search-api/src/search_api/services/base_solr/utils/query_params.py b/search-api/src/search_api/services/base_solr/utils/query_params.py new file mode 100644 index 00000000..b2da769f --- /dev/null +++ b/search-api/src/search_api/services/base_solr/utils/query_params.py @@ -0,0 +1,34 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Solr query params.""" +from dataclasses import dataclass + +from search_api.utils.base import BaseEnum + + +@dataclass +class QueryParams: # pylint: disable=too-few-public-methods + """Class definition of query params.""" + + query: dict[str, str] + rows: int + start: int + categories: dict[BaseEnum, list[str]] + child_query: dict[str, str] + child_categories: dict[BaseEnum, list[str]] + child_date_ranges: dict[BaseEnum, str] + fields: list[str] + query_fields: dict[BaseEnum, str] + query_boost_fields: dict[BaseEnum, int] + query_fuzzy_fields: dict[BaseEnum, dict[str, int]] diff --git a/search-api/src/search_api/services/business_solr/__init__.py b/search-api/src/search_api/services/business_solr/__init__.py new file mode 100644 index 00000000..462ce79e --- /dev/null +++ b/search-api/src/search_api/services/business_solr/__init__.py @@ -0,0 +1,64 @@ +# Copyright © 2022 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This module wraps the solr classes/fields for using registries search solr.""" +from dataclasses import asdict + +from search_api.services.base_solr import Solr +from search_api.services.base_solr.utils import QueryBuilder + +from .doc_fields import BusinessField, PartyField +from .doc_models import BusinessDoc + + +class BusinessSolr(Solr): + """Wrapper around the solr instance.""" + + query_builder = QueryBuilder( + identifier_field_values=[BusinessField.IDENTIFIER.value, BusinessField.IDENTIFIER_Q.value], + unique_parent_field=BusinessField.IDENTIFIER) + + # fields + business_fields = [ + BusinessField.BN.value, BusinessField.IDENTIFIER.value, BusinessField.NAME.value, + BusinessField.STATE.value, BusinessField.TYPE.value, BusinessField.GOOD_STANDING.value, + BusinessField.SCORE.value + ] + business_with_parties_fields = [ + BusinessField.BN.value, BusinessField.IDENTIFIER.value, BusinessField.NAME.value, + BusinessField.STATE.value, BusinessField.TYPE.value, BusinessField.GOOD_STANDING.value, + BusinessField.PARTIES.value, '[child]', BusinessField.SCORE.value, + PartyField.PARTY_NAME.value, PartyField.PARTY_ROLE.value, PartyField.PARTY_TYPE.value + ] + party_fields = [ + PartyField.PARENT_BN.value, PartyField.PARENT_IDENTIFIER.value, + PartyField.PARENT_NAME.value, PartyField.PARENT_STATE.value, PartyField.PARENT_TYPE.value, + PartyField.PARTY_NAME.value, PartyField.PARTY_ROLE.value, PartyField.PARTY_TYPE.value + ] + + def create_or_replace_docs(self, + docs: list[BusinessDoc] = None, + raw_docs: list[dict] = None, + timeout=25, + additive=True): + """Create or replace solr docs in the core.""" + update_list = raw_docs if raw_docs else [asdict(doc) for doc in docs] + + if not additive and not raw_docs: + for business_dict in update_list: + # parties + if parties := business_dict.get(BusinessField.PARTIES.value, None): + business_dict[BusinessField.PARTIES.value] = {'set': parties} + + url = self.update_url if len(update_list) < 1000 else self.bulk_update_url + return self.call_solr('POST', url, json_data=update_list, timeout=timeout) diff --git a/search-api/src/search_api/services/business_solr/doc_fields/__init__.py b/search-api/src/search_api/services/business_solr/doc_fields/__init__.py new file mode 100644 index 00000000..b376b4bd --- /dev/null +++ b/search-api/src/search_api/services/business_solr/doc_fields/__init__.py @@ -0,0 +1,29 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manages the business solr doc fields.""" +from .business import BusinessField +from .party import PartyField diff --git a/search-api/src/search_api/services/business_solr/doc_fields/business.py b/search-api/src/search_api/services/business_solr/doc_fields/business.py new file mode 100644 index 00000000..bc5115a9 --- /dev/null +++ b/search-api/src/search_api/services/business_solr/doc_fields/business.py @@ -0,0 +1,53 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manages business doc fields.""" +from search_api.utils.base import BaseEnum + + +class BusinessField(BaseEnum): # pylint: disable=too-few-public-methods + """Enum of the business fields available.""" + + # unique key for all docs + UNIQUE_KEY = 'id' + # stored fields + BN = 'bn' + IDENTIFIER = 'identifier' + NAME = 'name' + PARTIES = 'parties' + STATE = 'status' + TYPE = 'legalType' + # query fields + BN_Q = 'bn_q' + IDENTIFIER_Q = 'identifier_q' + NAME_Q = 'name_q' + NAME_SINGLE = 'name_single_term' + NAME_STEM_AGRO = 'name_stem_agro' + NAME_SUGGEST = 'name_suggest' + NAME_XTRA_Q = 'name_xtra_q' + GOOD_STANDING = 'goodStanding' + # common built in across docs + SCORE = 'score' diff --git a/search-api/src/search_api/services/solr/solr_fields.py b/search-api/src/search_api/services/business_solr/doc_fields/party.py similarity index 66% rename from search-api/src/search_api/services/solr/solr_fields.py rename to search-api/src/search_api/services/business_solr/doc_fields/party.py index e646ea6e..4e8bfc2c 100644 --- a/search-api/src/search_api/services/solr/solr_fields.py +++ b/search-api/src/search_api/services/business_solr/doc_fields/party.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Province of British Columbia +# Copyright © 2024 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# Copyright © 2022 Province of British Columbia +# Copyright © 2023 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,23 +24,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Manages solr fields for search solr.""" +"""Manages party doc fields.""" from search_api.utils.base import BaseEnum -class SolrField(BaseEnum): - """Enum of the fields available in the solr search core.""" +class PartyField(BaseEnum): # pylint: disable=too-few-public-methods + """Enum of the party fields available.""" - # base doc stored fields - BN = 'bn' - IDENTIFIER = 'identifier' - NAME = 'name' - PARTIES = 'parties' - SCORE = 'score' - STATE = 'status' - TYPE = 'legalType' - - # child parties doc stored fields + # unique key for all docs + UNIQUE_KEY = 'id' + # stored fields PARENT_BN = 'parentBN' PARENT_IDENTIFIER = 'parentIdentifier' PARENT_NAME = 'parentName' @@ -49,23 +42,13 @@ class SolrField(BaseEnum): PARTY_NAME = 'partyName' PARTY_ROLE = 'partyRoles' PARTY_TYPE = 'partyType' - - # business query fields - BN_Q = 'bn_q' - IDENTIFIER_Q = 'identifier_q' - NAME_Q = 'name_q' - NAME_SINGLE = 'name_single_term' - NAME_STEM_AGRO = 'name_stem_agro' - NAME_SUGGEST = 'name_suggest' - GOOD_STANDING = 'goodStanding' - - # party query fields + # query fields PARTY_NAME_Q = 'partyName_q' PARTY_NAME_SINGLE = 'partyName_single_term' PARTY_NAME_STEM_AGRO = 'partyName_stem_agro' - PARTY_NAME_SUGGEST = 'partyName_suggest' - PARENT_NAME_Q = 'parentName_q' + PARTY_NAME_XTRA_Q = 'partyName_xtra_q' PARENT_NAME_SINGLE = 'parentName_single_term' - PARENT_NAME_STEM_AGRO = 'parentName_stem_agro' PARENT_BN_Q = 'parentBN_q' PARENT_IDENTIFIER_Q = 'parentIdentifier_q' + # common built in across docs + SCORE = 'score' diff --git a/search-api/src/search_api/services/business_solr/doc_models/__init__.py b/search-api/src/search_api/services/business_solr/doc_models/__init__.py new file mode 100644 index 00000000..5a1bf0fd --- /dev/null +++ b/search-api/src/search_api/services/business_solr/doc_models/__init__.py @@ -0,0 +1,16 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This module manages the business solr doc models.""" +from .business import BusinessDoc +from .party import PartyDoc diff --git a/search-api/src/search_api/services/solr/solr_docs.py b/search-api/src/search_api/services/business_solr/doc_models/business.py similarity index 60% rename from search-api/src/search_api/services/solr/solr_docs.py rename to search-api/src/search_api/services/business_solr/doc_models/business.py index 041c2b05..b2b9074c 100644 --- a/search-api/src/search_api/services/solr/solr_docs.py +++ b/search-api/src/search_api/services/business_solr/doc_models/business.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Province of British Columbia +# Copyright © 2024 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,37 +16,18 @@ from dataclasses import dataclass from typing import List, Optional - -@dataclass -class PartyDoc: - """Class representation for a solr business doc.""" - - parentLegalType: str - parentName: str - parentStatus: str - partyName: str - partyRoles: List[str] - partyType: str - parentBN: Optional[str] = None +from .party import PartyDoc @dataclass class BusinessDoc: """Class representation for a solr business doc.""" + id: str identifier: str legalType: str name: str status: str goodStanding: bool = None bn: Optional[str] = None - identifier_q: str = None parties: Optional[List[PartyDoc]] = None - - def __post_init__(self): - """Set identifier_q to the business level identifier. - - It isn't a copy field in solr to avoid including generated party identifiers - so it must be set explicitly during an update. - """ - self.identifier_q = self.identifier diff --git a/search-api/src/search_api/services/business_solr/doc_models/party.py b/search-api/src/search_api/services/business_solr/doc_models/party.py new file mode 100644 index 00000000..3f4ad02d --- /dev/null +++ b/search-api/src/search_api/services/business_solr/doc_models/party.py @@ -0,0 +1,32 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=invalid-name +"""Manages solr dataclasses for search solr docs.""" +from dataclasses import dataclass +from typing import List, Optional + + +@dataclass +class PartyDoc: + """Class representation for a solr business doc.""" + + id: str + parentLegalType: str + parentIdentifier: str + parentName: str + parentStatus: str + partyName: str + partyRoles: List[str] + partyType: str + parentBN: Optional[str] = None diff --git a/search-api/src/search_api/services/business_solr/utils/__init__.py b/search-api/src/search_api/services/business_solr/utils/__init__.py new file mode 100644 index 00000000..05d64f68 --- /dev/null +++ b/search-api/src/search_api/services/business_solr/utils/__init__.py @@ -0,0 +1,17 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This module manages util methods for the business solr service.""" +from .business_search import business_search +from .business_suggest import business_suggest +from .parties_search import parties_search diff --git a/search-api/src/search_api/services/business_solr/utils/add_category_filters.py b/search-api/src/search_api/services/business_solr/utils/add_category_filters.py new file mode 100644 index 00000000..c91924db --- /dev/null +++ b/search-api/src/search_api/services/business_solr/utils/add_category_filters.py @@ -0,0 +1,27 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Category filter methods.""" +from .. import BusinessSolr +from ..doc_fields import BusinessField + + +def add_category_filters(solr_payload: dict, + categories: dict[BusinessField, list[str]], + is_nested: bool, + solr: BusinessSolr): + """Attach filter queries for categories to the params.""" + for category in categories: + if category_filters := categories[category]: + filter_str = solr.query_builder.build_facet_query(category, category_filters, is_nested) + solr_payload['filter'].append(filter_str) diff --git a/search-api/src/search_api/services/business_solr/utils/business_search.py b/search-api/src/search_api/services/business_solr/utils/business_search.py new file mode 100644 index 00000000..9efdc440 --- /dev/null +++ b/search-api/src/search_api/services/business_solr/utils/business_search.py @@ -0,0 +1,61 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Business search methods.""" +from search_api.services.base_solr.utils import QueryParams + +from .add_category_filters import add_category_filters +from .. import BusinessSolr +from ..doc_fields import BusinessField + + +def business_search(params: QueryParams, solr: BusinessSolr): + """Return the list of businesses from Solr that match the query.""" + # initialize payload with base doc query (init query / filter) + initial_queries = solr.query_builder.build_base_query( + query=params.query, + fields=params.query_fields, + boost_fields=params.query_boost_fields, + fuzzy_fields=params.query_fuzzy_fields) + # boosts for term order result ordering + initial_queries['query'] += f' OR ({BusinessField.NAME_Q.value}:"{params.query["value"]}"~5^5)' + initial_queries['query'] += f' OR ({BusinessField.NAME_STEM_AGRO.value}:"{params.query["value"]}"~10^3)' + initial_queries['query'] += f' OR ({BusinessField.NAME_STEM_AGRO.value}:"{params.query["value"].split()[0]}"^2)' + + # add defaults + solr_payload = { + **initial_queries, + 'queries': { + 'parents': f'{BusinessField.IDENTIFIER.value}:*', + 'parentFilters': ' AND '.join(initial_queries['filter'])}, + 'facet': { + **solr.query_builder.build_facet(BusinessField.STATE, False), + **solr.query_builder.build_facet(BusinessField.TYPE, False) + }, + 'fields': params.fields + } + # base doc faceted filters + add_category_filters(solr_payload=solr_payload, + categories=params.categories, + is_nested=False, + solr=solr) + # child filter queries + if child_query := solr.query_builder.build_child_query(params.child_query): + solr_payload['filter'].append(child_query) + # child doc faceted filter queries + add_category_filters(solr_payload=solr_payload, + categories=params.child_categories, + is_nested=True, + solr=solr) + + return solr.query(solr_payload, params.start, params.rows) diff --git a/search-api/src/search_api/services/business_solr/utils/business_suggest.py b/search-api/src/search_api/services/business_solr/utils/business_suggest.py new file mode 100644 index 00000000..adb430c4 --- /dev/null +++ b/search-api/src/search_api/services/business_solr/utils/business_suggest.py @@ -0,0 +1,58 @@ +# Copyright © 2022 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Business suggest methods.""" +from search_api.services.business_solr import BusinessSolr +from search_api.services.business_solr.doc_fields import BusinessField + + +def business_suggest(query: str, solr: BusinessSolr, rows=5) -> list: + """Return the list of business suggestions from Solr from given text.""" + # 1st solr query (names) + name_suggestions = solr.suggest(query, rows, True) + # 2nd solr query (extra names) + extra_name_suggestions = [] + if len(name_suggestions) < rows: + name_select_payload = solr.query_builder.build_base_query({'value': query}, + {BusinessField.NAME_SINGLE: 'parent'}, + {}, + {}) + name_select_payload['fields'] = solr.business_fields + name_docs = solr.query(name_select_payload, rows).get('response', {}).get('docs', []) + extra_name_suggestions = [ + x.get(BusinessField.NAME.value).upper() + for x in name_docs if x.get(BusinessField.NAME.value)] + # remove dups + name_suggestions = name_suggestions + list(set(extra_name_suggestions) - set(name_suggestions)) + query = query.upper() # NOTE: needed for bn/identifier processing too + + # 3rd solr query (bns + identifiers) + identifier_suggestions = [] + bn_suggestions = [] + if len(name_suggestions) < rows: + bn_id_payload = { + 'query': f'{BusinessField.IDENTIFIER_Q.value}:{query} OR {BusinessField.BN_Q.value}:{query}', + 'fields': solr.business_fields} + bn_id_docs = solr.query(bn_id_payload, 0, rows).get('response', {}).get('docs', []) + + identifier_suggestions = [ + x.get(BusinessField.IDENTIFIER.value) for x in bn_id_docs if query in x.get(BusinessField.IDENTIFIER.value)] + bn_suggestions = [ + x.get(BusinessField.BN.value) for x in bn_id_docs + if x.get(BusinessField.BN.value) and query in x.get(BusinessField.BN.value, '')] + + # format/combine response + suggestions = [{'type': BusinessField.NAME.value, 'value': x} for x in name_suggestions] + suggestions += [{'type': BusinessField.IDENTIFIER.value, 'value': x} for x in identifier_suggestions] + suggestions += [{'type': BusinessField.BN.value, 'value': x} for x in bn_suggestions] + return suggestions[:rows] diff --git a/search-api/src/search_api/services/business_solr/utils/parties_search.py b/search-api/src/search_api/services/business_solr/utils/parties_search.py new file mode 100644 index 00000000..40398db8 --- /dev/null +++ b/search-api/src/search_api/services/business_solr/utils/parties_search.py @@ -0,0 +1,55 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Person search methods.""" +from search_api.services.base_solr.utils import QueryParams + +from .add_category_filters import add_category_filters +from .. import BusinessSolr +from ..doc_fields import PartyField + + +def parties_search(params: QueryParams, solr: BusinessSolr): + """Return the list of parties from Solr that match the query.""" + # initialize payload with base doc query (init query / filter) + initial_queries = solr.query_builder.build_base_query( + query=params.query, + fields=params.query_fields, + boost_fields=params.query_boost_fields, + fuzzy_fields=params.query_fuzzy_fields) + # boosts for term order result ordering + initial_queries['query'] += f' OR ({PartyField.PARTY_NAME_Q.value}:"{params.query["value"]}"~5^5)' + initial_queries['query'] += f' OR ({PartyField.PARTY_NAME_STEM_AGRO.value}:"{params.query["value"]}"~10^3)' + initial_queries['query'] += f' OR ({PartyField.PARTY_NAME_STEM_AGRO.value}:"{params.query["value"].split()[0]}"^2)' + + # add defaults + solr_payload = { + **initial_queries, + 'queries': { + 'parents': f'{PartyField.PARTY_NAME_Q.value}:*', + 'parentFilters': ' AND '.join(initial_queries['filter']) + }, + 'facet': { + **solr.query_builder.build_facet(PartyField.PARENT_TYPE, False), + **solr.query_builder.build_facet(PartyField.PARENT_STATE, False), + **solr.query_builder.build_facet(PartyField.PARTY_ROLE, False) + }, + 'fields': params.fields + } + # base doc faceted filters + add_category_filters(solr_payload=solr_payload, + categories=params.categories, + is_nested=False, + solr=solr) + + return solr.query(solr_payload, params.start, params.rows) diff --git a/search-api/src/search_api/services/flags.py b/search-api/src/search_api/services/flags.py index 883fb4f2..4293d796 100644 --- a/search-api/src/search_api/services/flags.py +++ b/search-api/src/search_api/services/flags.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Daxiom™ Systems Inc. +# Copyright © 2023 Daxiom™ Systems Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ from flask import Flask import search_api -# from search_api.models import User from search_api.services.authz import get_role from search_api.utils.auth import JwtManager @@ -52,7 +51,6 @@ def init_app(self, app: Flask, td: TestData = None): # pylint: disable=invalid- """ self.app = app self.sdk_key = app.config.get('LD_SDK_KEY') - if td: client = LDClient(config=Config('testing', update_processor_class=td)) elif self.sdk_key: @@ -61,11 +59,11 @@ def init_app(self, app: Flask, td: TestData = None): # pylint: disable=invalid- # with suppress(Exception): try: - if client and client.is_initialized(): # pylint: disable=E0601,E0606 + if client and client.is_initialized(): # pylint: disable=possibly-used-before-assignment app.extensions[Flags.COMPONENT_NAME] = client app.teardown_appcontext(self.teardown) except Exception as err: # noqa: B902 - current_app.logger.warn('issue registering flag service', err) + app.logger.warn('issue registering flag service %s', err) def teardown(self, exception): # pylint: disable=unused-argument; flask method signature """Destroy all objects created by this extension. @@ -88,13 +86,6 @@ def get_client(): except Exception: # noqa: B902 return None - @staticmethod - def get_anonymous_user(): - """Return an anonymous key.""" - return { - 'key': 'anonymous' - } - @staticmethod def flag_user(user: search_api.models.User, account_id: int = None, @@ -118,17 +109,20 @@ def flag_user(user: search_api.models.User, return _user + @staticmethod + def get_anonymous_user(): + """Return an anonymous key.""" + return {'key': 'anonymous'} + @staticmethod def value(flag: str, user=None): """Retrieve the value of the (flag, user) tuple.""" - client = Flags.get_client() - - if user: - flag_user = user - else: - flag_user = Flags.get_anonymous_user() - try: + client = Flags.get_client() + if user: + flag_user = user + else: + flag_user = Flags.get_anonymous_user() return client.variation(flag, flag_user, None) except Exception as err: # noqa: B902 # pylint: disable=consider-using-f-string diff --git a/search-api/src/search_api/services/solr/solr.py b/search-api/src/search_api/services/solr/solr.py deleted file mode 100644 index 4c294a06..00000000 --- a/search-api/src/search_api/services/solr/solr.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Manages solr class for using search solr.""" -import json -import re -from contextlib import suppress -from datetime import datetime, timedelta -from dataclasses import asdict -from http import HTTPStatus -from typing import Dict, List - -from requests import Response, Session -from requests.adapters import HTTPAdapter, Retry -from requests.exceptions import ConnectionError as SolrConnectionError -from flask import current_app - -from search_api.exceptions import SolrException - -from .solr_docs import BusinessDoc -from .solr_fields import SolrField - - -class Solr: - """Wrapper around the solr instance.""" - - def __init__(self, app=None): - """Initialize this object.""" - self.app = None - - self.solr_url = None - self.core = 'search' - self.default_start = 0 - self.default_rows = 10 - # facets - self.base_facets = json.dumps({ - SolrField.TYPE.value: {'type': 'terms', 'field': SolrField.TYPE.value}, - SolrField.STATE.value: {'type': 'terms', 'field': SolrField.STATE.value}}) - self.party_facets = json.dumps({ - SolrField.PARTY_ROLE.value: {'type': 'terms', 'field': SolrField.PARTY_ROLE.value}, - SolrField.PARENT_STATE.value: {'type': 'terms', 'field': SolrField.PARENT_STATE.value}, - SolrField.PARENT_TYPE.value: {'type': 'terms', 'field': SolrField.PARENT_TYPE.value}}) - # fields - self.base_fields = f'{SolrField.BN.value},{SolrField.IDENTIFIER.value},{SolrField.NAME.value},' + \ - f'{SolrField.STATE.value},{SolrField.TYPE.value},{SolrField.SCORE.value},{SolrField.GOOD_STANDING.value}' - self.nest_fields_party = f'{SolrField.PARTIES.value},{SolrField.PARTY_NAME.value},' + \ - f'{SolrField.PARTY_ROLE.value},{SolrField.PARTY_TYPE.value},[child childFilter=' + '{filter}]' - self.party_fields = f'{SolrField.PARENT_BN.value},{SolrField.PARENT_IDENTIFIER.value},' + \ - f'{SolrField.PARENT_NAME.value},{SolrField.PARENT_STATE.value},{SolrField.PARENT_TYPE.value},' + \ - f'{SolrField.PARTY_NAME.value},{SolrField.PARTY_ROLE.value},{SolrField.PARTY_TYPE.value}' - # query urls - self.search_query = '{url}/{core}/query' - self.suggest_query = '{url}/{core}/suggest' - self.update_query = '{url}/{core}/update?commitWithin=1000&overwrite=true&wt=json' - - if app: - self.init_app(app) - - def init_app(self, app): - """Initialize the Solr environment.""" - self.app = app - self.solr_url = app.config.get('SOLR_SVC_URL') - - # pylint: disable=too-many-arguments - def call_solr(self, - method: str, - query: str, - params: dict = None, - json_data: dict = None, - xml_data: str = None, - force=False) -> Response: - """Call solr instance with given params.""" - try: - if self.is_reindexing() and not force: - err_msg = 'This resource is undergoing scheduled maintenance and will be ' \ - f'unavailable for up to {self.app.config.get("SOLR_REINDEX_LENGTH")} minutes.' - raise SolrException(err_msg, HTTPStatus.SERVICE_UNAVAILABLE) - - response = None - url = query.format(url=self.solr_url, core=self.core) - retry_times = 3 if method == 'GET' else 5 - backoff_factor = 1 if method == 'GET' else 2 - retries = Retry(total=retry_times, - backoff_factor=backoff_factor, - status_forcelist=[500, 502, 503, 504], - allowed_methods=['GET', 'POST']) - session = Session() - session.mount(url, HTTPAdapter(max_retries=retries)) - if method == 'GET': - response = session.get(url, params=params, timeout=30) - elif method == 'POST' and json_data: - response = session.post(url=url, json=json_data, timeout=60) - elif method == 'POST' and xml_data: - headers = {'Content-Type': 'application/xml'} - response = session.post(url=url, data=xml_data, headers=headers, timeout=60) - else: - raise Exception('Invalid params given.') # pylint: disable=broad-exception-raised - # check for error - if response.status_code != HTTPStatus.OK: - error = response.json().get('error', {}).get('msg', 'Error handling Solr request.') - self.app.logger.error(f'{error}, {response.status_code}') - raise SolrException( - error=error, - status_code=response.status_code) - return response - except SolrException as err: - # pass along - raise err - except SolrConnectionError as err: - current_app.logger.error(err.with_traceback(None)) - raise SolrException( - error='Read timeout error while handling Solr request.', - status_code=HTTPStatus.GATEWAY_TIMEOUT) from err - except Exception as err: # noqa B902 - current_app.logger.error(err.with_traceback(None)) - msg = 'Error handling Solr request.' - status_code = HTTPStatus.INTERNAL_SERVER_ERROR - with suppress(Exception): - status_code = response.status_code - msg = response.json().get('error', {}).get('msg', 'Error handling Solr request.') - raise SolrException( - error=msg, - status_code=status_code) from err - - def create_or_replace_docs(self, docs: List[BusinessDoc], force=False): - """Create or replace solr docs in the core.""" - update_json = [asdict(doc) for doc in docs] - response = self.call_solr('POST', self.update_query, json_data=update_json, force=force) - return response - - def delete_all_docs(self): - """Delete all solr docs from the core.""" - payload = '*:*' - response = self.call_solr('POST', self.update_query, xml_data=payload, force=True) - return response - - def delete_docs(self, identifiers: List[str]): - """Delete solr docs from the core.""" - payload = '' - if identifiers: - payload += f'{SolrField.IDENTIFIER.value}:{identifiers[0].upper()}' - for identifier in identifiers[1:]: - payload += f' OR {SolrField.IDENTIFIER.value}:{identifier.upper()}' - payload += '' - - response = self.call_solr('POST', self.update_query, xml_data=payload) - return response - - def is_reindexing(self) -> bool: - """Return True if this instance of solr is in the process of reindexing.""" - current_weekday = datetime.utcnow().weekday() - timeout_start_weekday = self.app.config.get('SOLR_REINDEX_WEEKDAY') - current_day = datetime.utcnow().strftime('%d') - timeout_start_day = self.app.config.get('SOLR_REINDEX_DAY') - if current_weekday == timeout_start_weekday or current_day == timeout_start_day: - current_time = datetime.time(datetime.utcnow()) - timeout_start_time = datetime.strptime(self.app.config.get('SOLR_REINDEX_START_TIME'), '%H:%M:%S%z') - timeout_length = self.app.config.get('SOLR_REINDEX_LENGTH') # in minutes - timeout_end_time = timeout_start_time + timedelta(minutes=timeout_length) - if timeout_start_time.time() < current_time < timeout_end_time.time(): - return True - return False - - def query(self, params: str, start: int = None, rows: int = None) -> List: - """Return a list of solr docs from the solr query handler for the given params.""" - params['start'] = start if start else self.default_start - params['rows'] = rows if rows else self.default_rows - - response = self.call_solr('GET', self.search_query, params=params) - return response.json() - - def suggest(self, query: str, rows: int, build: bool = False) -> List[str]: - """Return a list of suggestions from the solr suggest handler for the given query.""" - suggest_params = { - 'suggest.q': query, - 'suggest.count': rows if rows else self.default_rows, - 'suggest.build': str(build).lower() - } - # call solr - response = self.call_solr('GET', self.suggest_query, suggest_params) - # parse response - suggestions = response.json() \ - .get('suggest', {}).get('name', {}).get(query, {}).get('suggestions', []) - return [x.get('term', '').upper() for x in suggestions] # i.e. returning list = ['COMPANY 1', 'COMPANY 2', ...] - - @staticmethod - def build_filter_query(field: SolrField, values: List[str]): - """Return the solr filter clause for the given params.""" - filter_q = f'{field}:("{values[0]}"' - for val in values[1:]: - filter_q += f' OR "{val}"' - return filter_q + ')' - - @staticmethod - def build_split_query(query: Dict[str, str], fields: List[SolrField], wild_card_fields: List[SolrField]) -> Dict: - """Return a solr query with fqs for each subsequent term.""" - def add_identifier(field: SolrField, term: str): - """Return a special identifier query.""" - corp_prefix_regex = r'(^[aA-zZ]+)[0-9]+$' - if identifier := re.search(corp_prefix_regex, term): - prefix = identifier.group(1) - new_term = term.replace(prefix, '', 1) - return f'({field}:"{new_term}" AND {field}:"{prefix.upper()}")' - return f'{field}:{term}' - - def add_to_q(q: str, fields: List[SolrField], term: str): # pylint: disable=invalid-name - """Return an updated solr q param with extra clauses.""" - identifier_fields = [SolrField.IDENTIFIER_Q.value, SolrField.PARENT_IDENTIFIER_Q.value] - - first_clause = f'({fields[0]}:{term}' - if fields[0] in identifier_fields: - first_clause = f'({add_identifier(fields[0], term)}' - - if not q: - q = f'{first_clause}' - else: - q += f' AND {first_clause}' - if fields[0] in wild_card_fields and fields[0] not in identifier_fields: - q += '*' - for field in fields[1:]: - new_clause = f'{field}:{term}' - if field in identifier_fields: - new_clause = f'{add_identifier(field, term)}' - q += f' OR {new_clause}' - if field in wild_card_fields and field not in identifier_fields: - q += '*' - return q + ')' - - def add_to_fq(fq: str, fields: List[SolrField], terms: str): # pylint: disable=invalid-name - """Return an updated solr fq param with extra clauses.""" - for term in terms: - if fq: - fq += f' AND ({fields[0]}:{term}' - else: - fq = f'({fields[0]}:{term}' - if fields[0] in wild_card_fields: - fq += '*' - for field in fields[1:]: - fq += f' OR {field}:{term}' - if field in wild_card_fields: - fq += '*' - fq += ')' - return fq - - terms = query['value'].split() - # add initial q param - params = {'q': add_to_q('', fields, terms[0])} - # add initial filter param for subsequent terms - params['fq'] = add_to_fq('', fields, terms[1:]) - - # add query clause and subsequent filter clauses for extra query items - for key in query: - if key == 'value' or not query[key]: - continue - extra_terms = query[key].split() - # add query clause for 1st term in query[key] - params['q'] = add_to_q(params['q'], [key], extra_terms[0]) - # add filter clause for subsequent terms in query[key] - params['fq'] = add_to_fq(params['fq'], [key], extra_terms[1:]) - - return params - - @staticmethod - def highlight_names(query: str, names: List[str]) -> List[str]: - """Highlight terms within names.""" - highlighted_names = [] - # TODO: add stuff in here to catch special chars / stems etc. - for name in names: - name = name.replace(query, f'{query}') - highlighted_names.append(name) - return highlighted_names - - @staticmethod - def parse_facets(facet_data: Dict) -> Dict: - """Return formatted solr facet response data.""" - facet_info = facet_data.get('facets', {}) - facets = {} - for category in facet_info: - if category == 'count': - continue - facets[category] = [] - for item in facet_info[category]['buckets']: - facets[category].append({'value': item['val'], 'count': item['count']}) - - return {'fields': facets} - - @staticmethod - def prep_query_str(query: str) -> str: - """Return query string prepped for solr call.""" - # replace solr specific special chars - rmv_spec_chars_rgx = r'([\[\]!()\"~*?:/\\={}^%`#|<>,@$;_\-])' - handled_spec_chars_rgx = r'([&+]+)' - query = re.sub(rmv_spec_chars_rgx, ' ', query.lower()) - return re.sub(handled_spec_chars_rgx, r' \\\1 ', query) if not query.isspace() else r'\*' diff --git a/search-api/src/search_api/version.py b/search-api/src/search_api/version.py index 2688bf7b..05762dbc 100644 --- a/search-api/src/search_api/version.py +++ b/search-api/src/search_api/version.py @@ -22,4 +22,4 @@ Development release segment: .devN """ -__version__ = '1.0.11' # pylint: disable=invalid-name +__version__ = '2.0.0a1' # pylint: disable=invalid-name diff --git a/search-api/tests/postman/search-api.postman_collection.json b/search-api/tests/postman/search-api.postman_collection.json index 429c0e87..5ae2d27b 100644 --- a/search-api/tests/postman/search-api.postman_collection.json +++ b/search-api/tests/postman/search-api.postman_collection.json @@ -95,8 +95,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" }, { "key": "Account-Id", @@ -105,9 +105,9 @@ } ], "url": { - "raw": "{{base_url}}/{{version}}/ops/readyz", + "raw": "{{internal_url}}/{{version}}/ops/readyz", "host": [ - "{{base_url}}" + "{{internal_url}}" ], "path": [ "{{version}}", @@ -125,8 +125,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { @@ -186,8 +186,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { @@ -211,8 +211,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { @@ -277,14 +277,14 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/meta/info", + "raw": "{{internal_url}}/{{version}}/meta/info", "host": [ - "{{base_url}}" + "{{internal_url}}" ], "path": [ "{{version}}", @@ -302,8 +302,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { @@ -368,8 +368,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" }, { "key": "Account-Id", @@ -378,7 +378,7 @@ } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/suggest?query=001&rows=20&highlight=true", + "raw": "{{base_url}}/{{version}}/businesses/search/suggest?query=113456&rows=20&highlight=true", "host": [ "{{base_url}}" ], @@ -391,7 +391,7 @@ "query": [ { "key": "query", - "value": "001" + "value": "113456" }, { "key": "rows", @@ -413,8 +413,8 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" }, { "key": "Account-Id", @@ -423,7 +423,7 @@ } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/suggest?query=001&rows=20&highlight=true", + "raw": "{{base_url}}/{{version}}/businesses/search/suggest?query=113456&rows=20&highlight=true", "host": [ "{{base_url}}" ], @@ -436,7 +436,7 @@ "query": [ { "key": "query", - "value": "001" + "value": "113456" }, { "key": "rows", @@ -455,11 +455,11 @@ "header": [ { "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" + "value": "Werkzeug/3.0.3 Python/3.11.4" }, { "key": "Date", - "value": "Fri, 08 Jul 2022 00:08:42 GMT" + "value": "Tue, 23 Jul 2024 22:37:14 GMT" }, { "key": "Content-Type", @@ -467,7 +467,7 @@ }, { "key": "Content-Length", - "value": "1163" + "value": "185" }, { "key": "Access-Control-Allow-Origin", @@ -475,11 +475,7 @@ }, { "key": "API", - "value": "search_api/1.0.0a2.dev1" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" + "value": "search_api/2.0.0a1" }, { "key": "Connection", @@ -487,7 +483,7 @@ } ], "cookie": [], - "body": "{\n \"queryInfo\": {\n \"highlight\": true,\n \"query\": \"001\",\n \"rows\": 20\n },\n \"results\": [\n {\n \"type\": \"name\",\n \"value\": \"0018505 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0018297 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0018224 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0017127 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0016739 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0016429 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0014702 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0013983 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0013968 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0013056 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0013 INVESTMENTS LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0012480 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0012 HOLDINGS LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"00118 ENTERPRISE LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0011303 B.C. UNLIMITED LIABILITY COMPANY\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0010914 B.C. LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"0010862 B.C. UNLIMITED LIABILITY COMPANY\"\n },\n {\n \"type\": \"name\",\n \"value\": \"001 INK TONER CARTRIDGE LTD.\"\n },\n {\n \"type\": \"name\",\n \"value\": \"001 ENTERPRISES INC.\"\n },\n {\n \"type\": \"name\",\n \"value\": \".001 PAINTING CO. LTD.\"\n }\n ]\n}" + "body": "{\n \"queryInfo\": {\n \"highlight\": false,\n \"query\": \"113456\",\n \"rows\": 5\n },\n \"results\": [\n {\n \"type\": \"bn\",\n \"value\": \"113456789BC0001\"\n }\n ],\n \"warnings\": [\n \"This call is depreciated. Please use \\\"/facets\\\" instead.\"\n ]\n}" } ] }, @@ -498,14 +494,14 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:BC123333&categories=status:ACTIVE&start=0&rows=15", + "raw": "{{internal_url}}/{{version}}/businesses/search/facets?query=value:test&categories=status:ACTIVE&start=0&rows=15&parties=true", "host": [ - "{{base_url}}" + "{{internal_url}}" ], "path": [ "{{version}}", @@ -516,7 +512,7 @@ "query": [ { "key": "query", - "value": "value:BC123333" + "value": "value:test" }, { "key": "categories", @@ -529,6 +525,10 @@ { "key": "rows", "value": "15" + }, + { + "key": "parties", + "value": "true" } ] }, @@ -542,14 +542,14 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test coop::identifier:349&categories=status:ACTIVE&start=0&rows=15", + "raw": "{{internal_url}}/{{version}}/businesses/search/facets?query=value:test&categories=status:ACTIVE&start=0&rows=15&parties=true", "host": [ - "{{base_url}}" + "{{internal_url}}" ], "path": [ "{{version}}", @@ -560,7 +560,7 @@ "query": [ { "key": "query", - "value": "value:test coop::identifier:349" + "value": "value:test" }, { "key": "categories", @@ -573,6 +573,10 @@ { "key": "rows", "value": "15" + }, + { + "key": "parties", + "value": "true" } ] } @@ -583,11 +587,11 @@ "header": [ { "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" + "value": "Werkzeug/3.0.3 Python/3.11.4" }, { "key": "Date", - "value": "Wed, 27 Jul 2022 00:52:19 GMT" + "value": "Tue, 23 Jul 2024 22:35:32 GMT" }, { "key": "Content-Type", @@ -595,7 +599,7 @@ }, { "key": "Content-Length", - "value": "1205" + "value": "1833" }, { "key": "Access-Control-Allow-Origin", @@ -603,11 +607,7 @@ }, { "key": "API", - "value": "search_api/1.0.0b1.dev1" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" + "value": "search_api/2.0.0a1" }, { "key": "Connection", @@ -615,7 +615,7 @@ } ], "cookie": [], - "body": "{\n \"facets\": {\n \"fields\": {\n \"legalType\": [\n {\n \"count\": 8,\n \"value\": \"CP\"\n }\n ],\n \"status\": [\n {\n \"count\": 8,\n \"value\": \"ACTIVE\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"legalType\": \"\",\n \"status\": [\n \"ACTIVE\"\n ]\n },\n \"query\": {\n \"bn\": \"\",\n \"identifier\": \"349\",\n \"name\": \"\",\n \"value\": \"test coop\"\n },\n \"rows\": 15,\n \"start\": 0\n },\n \"results\": [\n {\n \"identifier\": \"CP3490234\",\n \"legalType\": \"CP\",\n \"name\": \"TEST COOP ANDRE\",\n \"score\": 426.3002,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490165\",\n \"legalType\": \"CP\",\n \"name\": \"TEST COOP IA\",\n \"score\": 426.3002,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490226\",\n \"legalType\": \"CP\",\n \"name\": \"TEST CONSENT COOP\",\n \"score\": 291.68088,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490282\",\n \"legalType\": \"CP\",\n \"name\": \"TEST REMOVE NAME TRANSLATION CO-OP\",\n \"score\": 139.77156,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490283\",\n \"legalType\": \"CP\",\n \"name\": \"TEST AGAIN FOR TRANSLATION REMOVE FROM COOP\",\n \"score\": 86.109634,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490286\",\n \"legalType\": \"CP\",\n \"name\": \"EVE TEST COOP 9270 COOP\",\n \"score\": 8.061411,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490287\",\n \"legalType\": \"CP\",\n \"name\": \"EVE TEST 9270 2 COOP\",\n \"score\": 8.061411,\n \"status\": \"ACTIVE\"\n },\n {\n \"identifier\": \"CP3490288\",\n \"legalType\": \"CP\",\n \"name\": \"EVE TEST COOP 9270 3 COOP\",\n \"score\": 7.806468,\n \"status\": \"ACTIVE\"\n }\n ],\n \"totalResults\": 8\n }\n}" + "body": "{\n \"facets\": {\n \"fields\": {\n \"legalType\": [\n {\n \"count\": 2,\n \"value\": \"CP\"\n },\n {\n \"count\": 2,\n \"value\": \"GP\"\n },\n {\n \"count\": 1,\n \"value\": \"BEN\"\n }\n ],\n \"status\": [\n {\n \"count\": 5,\n \"value\": \"ACTIVE\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"legalType\": \"\",\n \"status\": [\n \"ACTIVE\"\n ]\n },\n \"query\": {\n \"bn\": \"\",\n \"identifier\": \"\",\n \"name\": \"\",\n \"value\": \"test\"\n },\n \"rows\": 15,\n \"start\": 0\n },\n \"results\": [\n {\n \"bn\": \"123456788BC0001\",\n \"identifier\": \"FM1234568\",\n \"legalType\": \"GP\",\n \"name\": \"Test Partnership\",\n \"parties\": [\n {\n \"partyName\": \"Kyle Smile\",\n \"partyRoles\": [\n \"partner\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n },\n {\n \"partyName\": \"David Mortar\",\n \"partyRoles\": [\n \"partner\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n }\n ],\n \"score\": 1.861066,\n \"status\": \"ACTIVE\"\n },\n {\n \"bn\": \"113456789BC0001\",\n \"goodStanding\": true,\n \"identifier\": \"CP1233338\",\n \"legalType\": \"CP\",\n \"name\": \"Kyle stuffing tests\",\n \"score\": 1.6912264,\n \"status\": \"ACTIVE\"\n },\n {\n \"bn\": \"123456789BC0001\",\n \"goodStanding\": true,\n \"identifier\": \"BC1234567\",\n \"legalType\": \"BEN\",\n \"name\": \"Test Benefit Company\",\n \"parties\": [\n {\n \"partyName\": \"Joe Solver\",\n \"partyRoles\": [\n \"significant individual\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n },\n {\n \"partyName\": \"Willow Walaby\",\n \"partyRoles\": [\n \"significant individual\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n },\n {\n \"partyName\": \"Ryan Wills\",\n \"partyRoles\": [\n \"significant individual\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n },\n {\n \"partyName\": \"Filips Wills\",\n \"partyRoles\": [\n \"significant individual\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n }\n ],\n \"score\": 1.6427289,\n \"status\": \"ACTIVE\"\n },\n {\n \"bn\": \"123456789BC0001\",\n \"identifier\": \"FM1234567\",\n \"legalType\": \"GP\",\n \"name\": \"TEST ABCD, Bobby W Walaby\",\n \"parties\": [\n {\n \"partyName\": \"TEST ABCD\",\n \"partyRoles\": [\n \"partner\"\n ],\n \"partyType\": \"organization\",\n \"score\": 0\n },\n {\n \"partyName\": \"Bobby W Walaby\",\n \"partyRoles\": [\n \"partner\"\n ],\n \"partyType\": \"person\",\n \"score\": 0\n }\n ],\n \"score\": 1.3445685,\n \"status\": \"ACTIVE\"\n },\n {\n \"goodStanding\": false,\n \"identifier\": \"CP987654321\",\n \"legalType\": \"CP\",\n \"name\": \"Tested Coop\",\n \"score\": 0.69882464,\n \"status\": \"ACTIVE\"\n }\n ],\n \"totalResults\": 5\n }\n}" } ] }, @@ -626,12 +626,12 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test::bn:1::identifier:62::name:lab&start=0&rows=15&categories=legalType:sp,CP,bc,ben::status:ACTIVE,historical", + "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:t::bn:1::identifier:2::name:l&start=0&rows=15&categories=legalType:sp,CP,bc,ben::status:ACTIVE,historical", "host": [ "{{base_url}}" ], @@ -644,7 +644,7 @@ "query": [ { "key": "query", - "value": "value:test::bn:1::identifier:62::name:lab" + "value": "value:t::bn:1::identifier:2::name:l" }, { "key": "start", @@ -662,90 +662,7 @@ }, "description": "Get drafts example." }, - "response": [ - { - "name": "facets refined", - "originalRequest": { - "method": "GET", - "header": [ - { - "key": "Accept", - "type": "text", - "value": "application/json" - } - ], - "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test::bn:9::identifier:62::name:te equip stor&start=0&rows=15&categories=legalType:sp,CP,bc,ben::status:ACTIVE,historical", - "host": [ - "{{base_url}}" - ], - "path": [ - "{{version}}", - "businesses", - "search", - "facets" - ], - "query": [ - { - "key": "query", - "value": "value:test::bn:9::identifier:62::name:te equip stor" - }, - { - "key": "start", - "value": "0" - }, - { - "key": "rows", - "value": "15" - }, - { - "key": "categories", - "value": "legalType:sp,CP,bc,ben::status:ACTIVE,historical" - } - ] - } - }, - "status": "OK", - "code": 200, - "_postman_previewlanguage": "json", - "header": [ - { - "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" - }, - { - "key": "Date", - "value": "Wed, 27 Jul 2022 00:45:41 GMT" - }, - { - "key": "Content-Type", - "value": "application/json" - }, - { - "key": "Content-Length", - "value": "501" - }, - { - "key": "Access-Control-Allow-Origin", - "value": "*" - }, - { - "key": "API", - "value": "search_api/1.0.0b1.dev1" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" - }, - { - "key": "Connection", - "value": "close" - } - ], - "cookie": [], - "body": "{\n \"facets\": {\n \"fields\": {\n \"legalType\": [\n {\n \"count\": 1,\n \"value\": \"SP\"\n }\n ],\n \"status\": [\n {\n \"count\": 1,\n \"value\": \"ACTIVE\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"legalType\": [\n \"sp\",\n \"CP\",\n \"bc\",\n \"ben\"\n ],\n \"status\": [\n \"ACTIVE\",\n \"historical\"\n ]\n },\n \"query\": {\n \"bn_q\": \"9\",\n \"identifier_q\": \"62\",\n \"name_q\": \"te equip stor\",\n \"value\": \"test\"\n },\n \"rows\": 15,\n \"start\": 0\n },\n \"results\": [\n {\n \"bn\": \"824492631BC0001\",\n \"identifier\": \"FM0623527\",\n \"legalType\": \"SP\",\n \"name\": \"TESLA TEST EQUIPMENT STORE - IMPORT_TEST\",\n \"score\": 10.710747,\n \"status\": \"ACTIVE\"\n }\n ],\n \"totalResults\": 1\n }\n}" - } - ] + "response": [] }, { "name": "facets special operator", @@ -754,12 +671,12 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test AND coop::identifier:349&categories=status:ACTIVE&start=0&rows=15", + "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test AND coop::identifier:3&categories=status:ACTIVE&start=0&rows=15", "host": [ "{{base_url}}" ], @@ -772,7 +689,7 @@ "query": [ { "key": "query", - "value": "value:test AND coop::identifier:349" + "value": "value:test AND coop::identifier:3" }, { "key": "categories", @@ -790,90 +707,7 @@ }, "description": "Get drafts example." }, - "response": [ - { - "name": "facets special operator", - "originalRequest": { - "method": "GET", - "header": [ - { - "key": "Accept", - "type": "text", - "value": "application/json" - } - ], - "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/facets?query=value:test AND coop::identifier:349&categories=status:ACTIVE&start=0&rows=15", - "host": [ - "{{base_url}}" - ], - "path": [ - "{{version}}", - "businesses", - "search", - "facets" - ], - "query": [ - { - "key": "query", - "value": "value:test AND coop::identifier:349" - }, - { - "key": "categories", - "value": "status:ACTIVE" - }, - { - "key": "start", - "value": "0" - }, - { - "key": "rows", - "value": "15" - } - ] - } - }, - "status": "OK", - "code": 200, - "_postman_previewlanguage": "json", - "header": [ - { - "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" - }, - { - "key": "Date", - "value": "Tue, 02 Aug 2022 22:18:39 GMT" - }, - { - "key": "Content-Type", - "value": "application/json" - }, - { - "key": "Content-Length", - "value": "408" - }, - { - "key": "Access-Control-Allow-Origin", - "value": "*" - }, - { - "key": "API", - "value": "search_api/1.0.0b1.dev3" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" - }, - { - "key": "Connection", - "value": "close" - } - ], - "cookie": [], - "body": "{\n \"facets\": {\n \"fields\": {\n \"legalType\": [\n {\n \"count\": 1,\n \"value\": \"CP\"\n }\n ],\n \"status\": [\n {\n \"count\": 1,\n \"value\": \"ACTIVE\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"legalType\": \"\",\n \"status\": [\n \"ACTIVE\"\n ]\n },\n \"query\": {\n \"bn\": \"\",\n \"identifier\": \"349\",\n \"name\": \"\",\n \"value\": \"test and coop\"\n },\n \"rows\": 15,\n \"start\": 0\n },\n \"results\": [\n {\n \"identifier\": \"CP3490234\",\n \"legalType\": \"CP\",\n \"name\": \"TEST COOP ANDRE\",\n \"score\": 8.75138,\n \"status\": \"ACTIVE\"\n }\n ],\n \"totalResults\": 1\n }\n}" - } - ] + "response": [] }, { "name": "parties", @@ -882,12 +716,12 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test p&categories=partyRoles:partner,proprietor&rows=100", + "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test&categories=partyRoles:partner,proprietor&rows=100", "host": [ "{{base_url}}" ], @@ -900,7 +734,7 @@ "query": [ { "key": "query", - "value": "value:test p" + "value": "value:test" }, { "key": "categories", @@ -922,12 +756,12 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test p&categories=partyRoles:partner,proprietor&rows=100", + "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test&categories=partyRoles:partner,proprietor&rows=100", "host": [ "{{base_url}}" ], @@ -940,7 +774,7 @@ "query": [ { "key": "query", - "value": "value:test p" + "value": "value:test" }, { "key": "categories", @@ -959,11 +793,11 @@ "header": [ { "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" + "value": "Werkzeug/3.0.3 Python/3.11.4" }, { "key": "Date", - "value": "Wed, 27 Jul 2022 23:28:24 GMT" + "value": "Tue, 23 Jul 2024 22:38:41 GMT" }, { "key": "Content-Type", @@ -971,7 +805,7 @@ }, { "key": "Content-Length", - "value": "1108" + "value": "651" }, { "key": "Access-Control-Allow-Origin", @@ -979,11 +813,7 @@ }, { "key": "API", - "value": "search_api/1.0.0b1.dev1" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" + "value": "search_api/2.0.0a1" }, { "key": "Connection", @@ -991,7 +821,7 @@ } ], "cookie": [], - "body": "{\n \"facets\": {\n \"fields\": {\n \"parentLegalType\": [\n {\n \"count\": 3,\n \"value\": \"SP\"\n }\n ],\n \"parentStatus\": [\n {\n \"count\": 2,\n \"value\": \"HISTORICAL\"\n },\n {\n \"count\": 1,\n \"value\": \"ACTIVE\"\n }\n ],\n \"partyRoles\": [\n {\n \"count\": 3,\n \"value\": \"proprietor\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"parentLegalType\": \"\",\n \"parentStatus\": \"\",\n \"partyRoles\": [\n \"partner\",\n \"proprietor\"\n ]\n },\n \"query\": {\n \"parentBN\": \"\",\n \"parentIdentifier\": \"\",\n \"parentName\": \"\",\n \"partyName\": \"\",\n \"value\": \"test p\"\n },\n \"rows\": 100,\n \"start\": 0\n },\n \"results\": [\n {\n \"parentBN\": \"993558808\",\n \"parentIdentifier\": \"FM1000030\",\n \"parentLegalType\": \"SP\",\n \"parentName\": \"SOLO 1.5\",\n \"parentStatus\": \"HISTORICAL\",\n \"partyName\": \"TEST PROP PROP\",\n \"partyRoles\": [\n \"proprietor\"\n ],\n \"partyType\": \"person\"\n },\n {\n \"parentBN\": \"993539402\",\n \"parentIdentifier\": \"FM1000034\",\n \"parentLegalType\": \"SP\",\n \"parentName\": \"SOLO DIS 1.6\",\n \"parentStatus\": \"ACTIVE\",\n \"partyName\": \"TEST PROP PROP LNAME\",\n \"partyRoles\": [\n \"proprietor\"\n ],\n \"partyType\": \"person\"\n },\n {\n \"parentBN\": \"993538404\",\n \"parentIdentifier\": \"FM1000036\",\n \"parentLegalType\": \"SP\",\n \"parentName\": \"SOLO DIS SAVE 1.0\",\n \"parentStatus\": \"HISTORICAL\",\n \"partyName\": \"TEST PROP L PROP\",\n \"partyRoles\": [\n \"proprietor\"\n ],\n \"partyType\": \"person\"\n }\n ],\n \"totalResults\": 3\n }\n}" + "body": "{\n \"facets\": {\n \"fields\": {\n \"parentLegalType\": [\n {\n \"count\": 1,\n \"value\": \"GP\"\n }\n ],\n \"parentStatus\": [\n {\n \"count\": 1,\n \"value\": \"ACTIVE\"\n }\n ],\n \"partyRoles\": [\n {\n \"count\": 1,\n \"value\": \"partner\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"parentLegalType\": \"\",\n \"parentStatus\": \"\",\n \"partyRoles\": [\n \"partner\",\n \"proprietor\"\n ]\n },\n \"query\": {\n \"parentBN\": \"\",\n \"parentIdentifier\": \"\",\n \"parentName\": \"\",\n \"partyName\": \"\",\n \"value\": \"test\"\n },\n \"rows\": 10,\n \"start\": 0\n },\n \"results\": [\n {\n \"parentBN\": \"123456789BC0001\",\n \"parentIdentifier\": \"FM1234567\",\n \"parentLegalType\": \"GP\",\n \"parentName\": \"TEST ABCD, Bobby W Walaby\",\n \"parentStatus\": \"ACTIVE\",\n \"partyName\": \"TEST ABCD\",\n \"partyRoles\": [\n \"partner\"\n ],\n \"partyType\": \"organization\"\n }\n ],\n \"totalResults\": 1\n }\n}" } ] }, @@ -1002,12 +832,12 @@ "header": [ { "key": "Accept", - "type": "text", - "value": "application/json" + "value": "application/json", + "type": "text" } ], "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test::parentName:solo&rows=100&categories=partyRoles:partner,proprietor::parentLegalType:SP,gp::parentStatus:active", + "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test::parentName:bobby&rows=100&categories=partyRoles:partner,proprietor::parentLegalType:SP,gp::parentStatus:active", "host": [ "{{base_url}}" ], @@ -1020,7 +850,7 @@ "query": [ { "key": "query", - "value": "value:test::parentName:solo" + "value": "value:test::parentName:bobby" }, { "key": "rows", @@ -1034,113 +864,34 @@ }, "description": "Get drafts example." }, - "response": [ + "response": [] + } + ] + }, + { + "name": "document-requests", + "item": [ + { + "name": "Create Document Access Request", + "event": [ { - "name": "parties refined", - "originalRequest": { - "method": "GET", - "header": [ - { - "key": "Accept", - "type": "text", - "value": "application/json" - } - ], - "url": { - "raw": "{{base_url}}/{{version}}/businesses/search/parties?query=value:test::parentName:solo&rows=100&categories=partyRoles:partner,proprietor::parentLegalType:SP,gp::parentStatus:active", - "host": [ - "{{base_url}}" - ], - "path": [ - "{{version}}", - "businesses", - "search", - "parties" - ], - "query": [ - { - "key": "query", - "value": "value:test::parentName:solo" - }, - { - "key": "rows", - "value": "100" - }, - { - "key": "categories", - "value": "partyRoles:partner,proprietor::parentLegalType:SP,gp::parentStatus:active" - } - ] - } - }, - "status": "OK", - "code": 200, - "_postman_previewlanguage": "json", - "header": [ - { - "key": "Server", - "value": "Werkzeug/2.1.2 Python/3.8.2" - }, - { - "key": "Date", - "value": "Wed, 27 Jul 2022 23:29:36 GMT" - }, - { - "key": "Content-Type", - "value": "application/json" - }, - { - "key": "Content-Length", - "value": "665" - }, - { - "key": "Access-Control-Allow-Origin", - "value": "*" - }, - { - "key": "API", - "value": "search_api/1.0.0b1.dev1" - }, - { - "key": "SCHEMAS", - "value": "registry_schemas/1.1.0" - }, - { - "key": "Connection", - "value": "close" - } - ], - "cookie": [], - "body": "{\n \"facets\": {\n \"fields\": {\n \"parentLegalType\": [\n {\n \"count\": 1,\n \"value\": \"SP\"\n }\n ],\n \"parentStatus\": [\n {\n \"count\": 1,\n \"value\": \"ACTIVE\"\n }\n ],\n \"partyRoles\": [\n {\n \"count\": 1,\n \"value\": \"proprietor\"\n }\n ]\n }\n },\n \"searchResults\": {\n \"queryInfo\": {\n \"categories\": {\n \"parentLegalType\": [\n \"SP\",\n \"gp\"\n ],\n \"parentStatus\": [\n \"active\"\n ],\n \"partyRoles\": [\n \"partner\",\n \"proprietor\"\n ]\n },\n \"query\": {\n \"parentBN\": \"\",\n \"parentIdentifier\": \"\",\n \"parentName\": \"solo\",\n \"partyName\": \"\",\n \"value\": \"test\"\n },\n \"rows\": 100,\n \"start\": 0\n },\n \"results\": [\n {\n \"parentBN\": \"993539402\",\n \"parentIdentifier\": \"FM1000034\",\n \"parentLegalType\": \"SP\",\n \"parentName\": \"SOLO DIS 1.6\",\n \"parentStatus\": \"ACTIVE\",\n \"partyName\": \"TEST PROP PROP LNAME\",\n \"partyRoles\": [\n \"proprietor\"\n ],\n \"partyType\": \"person\"\n }\n ],\n \"totalResults\": 1\n }\n}" - } - ] - } - ] - }, - { - "name": "document-requests", - "item": [ - { - "name": "Create Document Access Request", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "var jsonData = pm.response.json()\r", - "\r", - "pm.test(\"Status code is 201/Created\", function () {\r", - " pm.response.to.have.status(201);\r", - " \r", - " // success, so set keys\r", - " pm.environment.set(\"access_request_id\", jsonData.id)\r", - " pm.environment.set(\"document_key\", jsonData.documents[0].documentKey)\r", - "});\r", - "\r", - "pm.test('should return JSON', function () {\r", - " pm.response.to.have.header('Content-Type', 'application/json');\r", - "});\r", - "" + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 201/Created\", function () {\r", + " pm.response.to.have.status(201);\r", + " \r", + " // success, so set keys\r", + " pm.environment.set(\"access_request_id\", jsonData.id)\r", + " pm.environment.set(\"document_key\", jsonData.documents[0].documentKey)\r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" ], "type": "text/javascript" } @@ -2199,286 +1950,796 @@ "name": "internal", "item": [ { - "name": "business solr update", - "event": [ + "name": "update", + "item": [ { - "listen": "test", - "script": { - "exec": [ - "var jsonData = pm.response.json()\r", - "\r", - "pm.test(\"Status code is 200/OK\", function () {\r", - " pm.response.to.have.status(200); \r", - "});\r", - "\r", - "pm.test('should return JSON', function () {\r", - " pm.response.to.have.header('Content-Type', 'application/json');\r", - "});\r", - "" + "name": "business solr update", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 202/Accepted\", function () {\r", + " pm.response.to.have.status(202); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "PUT", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } ], - "type": "text/javascript" - } - } - ], - "request": { - "auth": { - "type": "bearer", - "bearer": [ - { - "key": "token", - "value": "{{token}}", - "type": "string" + "body": { + "mode": "raw", + "raw": "{\r\n \"business\":{\r\n \"goodStanding\": true,\r\n \"identifier\":\"CP1233338\",\r\n \"legalName\":\"Kyle stuffing tests\",\r\n \"legalType\":\"CP\",\r\n \"taxId\":\"113456789BC0001\",\r\n \"state\":\"ACTIVE\"\r\n }\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/update", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update" + ] } - ] - }, - "method": "PUT", - "header": [ - { - "key": "Accept", - "value": "application/json", - "type": "text" }, - { - "key": "Account-Id", - "value": "{{account_id}}", - "type": "text" - } - ], - "body": { - "mode": "raw", - "raw": "{\r\n \"business\":{\r\n \"goodStanding\": true,\r\n \"identifier\":\"BC1233334\",\r\n \"legalName\":\"ABCD EFGH Corp\",\r\n \"legalType\":\"BEN\",\r\n \"taxId\":\"123456789BC0001\",\r\n \"state\":\"ACTIVE\"\r\n }\r\n}", - "options": { - "raw": { - "language": "json" - } - } + "response": [] }, - "url": { - "raw": "{{internal_url}}/{{version}}/internal/solr/update", - "host": [ - "{{internal_url}}" - ], - "path": [ - "{{version}}", - "internal", - "solr", - "update" - ] - } - }, - "response": [] - }, - { - "name": "business + parties solr update", - "event": [ { - "listen": "test", - "script": { - "exec": [ - "var jsonData = pm.response.json()\r", - "\r", - "pm.test(\"Status code is 200/OK\", function () {\r", - " pm.response.to.have.status(200); \r", - "});\r", - "\r", - "pm.test('should return JSON', function () {\r", - " pm.response.to.have.header('Content-Type', 'application/json');\r", - "});\r", - "" + "name": "business + parties solr update", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 202/Accepted\", function () {\r", + " pm.response.to.have.status(202); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "PUT", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } ], - "type": "text/javascript" - } - } - ], - "request": { - "auth": { - "type": "bearer", - "bearer": [ - { - "key": "token", - "value": "{{token}}", - "type": "string" + "body": { + "mode": "raw", + "raw": "{\r\n \"business\":{\r\n \"identifier\":\"FM1234567\",\r\n \"legalName\":\"TEST Bob Partnership\",\r\n \"legalType\":\"GP\",\r\n \"taxId\":\"123456789BC0001\",\r\n \"state\":\"ACTIVE\"\r\n },\r\n \"parties\":[\r\n {\r\n \"officer\": {\r\n \"organizationName\":\"TEST ABCD\",\r\n \"partyType\":\"organization\",\r\n \"id\": 543\r\n },\r\n \"roles\":[\r\n {\r\n \"appointmentDate\": \"2000-01-04\",\r\n \"cessationDate\": null,\r\n \"roleType\": \"partner\"\r\n }\r\n ]\r\n },\r\n {\r\n \"officer\": {\r\n \"firstName\":\"Bobby\",\r\n \"lastName\": \"Walaby\",\r\n \"middleInitial\": \"W\",\r\n \"partyType\":\"person\",\r\n \"id\": 123\r\n },\r\n \"roles\":[\r\n {\r\n \"appointmentDate\": \"2000-01-04\",\r\n \"cessationDate\": null,\r\n \"roleType\": \"partner\"\r\n }\r\n ]\r\n }\r\n ]\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{base_url}}/{{version}}/internal/solr/update", + "host": [ + "{{base_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update" + ] } - ] - }, - "method": "PUT", - "header": [ - { - "key": "Accept", - "value": "application/json", - "type": "text" }, - { - "key": "Account-Id", - "value": "{{account_id}}", - "type": "text" - } - ], - "body": { - "mode": "raw", - "raw": "{\r\n \"business\":{\r\n \"alternateNames\": [\r\n {\r\n \"entityType\": \"GP\",\r\n \"identifier\": \"FM1234567\",\r\n \"nameRegisteredDate\": \"2022-08-15T08:00:00+00:00\",\r\n \"nameStartDate\": \"2022-08-16\",\r\n \"operatingName\": \"Test update parties\"\r\n }\r\n ],\r\n \"identifier\":\"FM1234567\",\r\n \"legalName\":\"TEST ABCD, Bobby W Walaby\",\r\n \"legalType\":\"GP\",\r\n \"taxId\":\"123456789BC0001\",\r\n \"state\":\"ACTIVE\"\r\n },\r\n \"parties\":[\r\n {\r\n \"officer\": {\r\n \"organizationName\":\"TEST ABCD\",\r\n \"partyType\":\"organization\"\r\n },\r\n \"roles\":[\r\n {\r\n \"appointmentDate\": \"2000-01-04\",\r\n \"cessationDate\": null,\r\n \"roleType\": \"partner\"\r\n }\r\n ]\r\n },\r\n {\r\n \"officer\": {\r\n \"firstName\":\"Bobby\",\r\n \"lastName\": \"Walaby\",\r\n \"middleInitial\": \"W\",\r\n \"partyType\":\"person\"\r\n },\r\n \"roles\":[\r\n {\r\n \"appointmentDate\": \"2000-01-04\",\r\n \"cessationDate\": null,\r\n \"roleType\": \"partner\"\r\n }\r\n ]\r\n }\r\n ]\r\n}", - "options": { - "raw": { - "language": "json" - } - } + "response": [] }, - "url": { - "raw": "{{base_url}}/{{version}}/internal/solr/update", - "host": [ - "{{base_url}}" + { + "name": "sync", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(200); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript" + } + } ], - "path": [ - "{{version}}", - "internal", - "solr", - "update" - ] + "protocolProfileBehavior": { + "disableBodyPruning": true + }, + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "GET", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/update/sync", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update", + "sync" + ] + } + }, + "response": [] + }, + { + "name": "sync heartbeat", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(200); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript" + } + } + ], + "protocolProfileBehavior": { + "disableBodyPruning": true + }, + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "GET", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/update/sync/heartbeat", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update", + "sync", + "heartbeat" + ] + } + }, + "response": [] + }, + { + "name": "resync mins offset", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(201); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript" + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "POST", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\r\n \"minutesOffset\": 60\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{base_url}}/{{version}}/internal/solr/update/resync", + "host": [ + "{{base_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update", + "resync" + ] + } + }, + "response": [] + }, + { + "name": "resync identifiers", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(201); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "POST", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\r\n \"identifiers\": [\"CP1233338\"]\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{base_url}}/{{version}}/internal/solr/update/resync", + "host": [ + "{{base_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "update", + "resync" + ] + } + }, + "response": [] } - }, - "response": [] + ] }, { - "name": "resync mins offset", - "event": [ + "name": "command", + "item": [ { - "listen": "test", - "script": { - "exec": [ - "var jsonData = pm.response.json()\r", - "\r", - "pm.test(\"Status code is 200/OK\", function () {\r", - " pm.response.to.have.status(201); \r", - "});\r", - "\r", - "pm.test('should return JSON', function () {\r", - " pm.response.to.have.header('Content-Type', 'application/json');\r", - "});\r", - "" + "name": "backup", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(200); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "POST", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } ], - "type": "text/javascript" - } - } - ], - "request": { - "auth": { - "type": "bearer", - "bearer": [ - { - "key": "token", - "value": "{{token}}", - "type": "string" + "body": { + "mode": "raw", + "raw": "{\r\n \"command\": \"backup\"\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/command", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "command" + ] } - ] - }, - "method": "POST", - "header": [ - { - "key": "Accept", - "value": "application/json", - "type": "text" }, - { - "key": "Account-Id", - "value": "{{account_id}}", - "type": "text" - } - ], - "body": { - "mode": "raw", - "raw": "{\r\n \"minutesOffset\": 60\r\n}", - "options": { - "raw": { - "language": "json" + "response": [] + }, + { + "name": "restore", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(200); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } } - } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "POST", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\r\n \"command\": \"restore\"\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/command", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "command" + ] + } + }, + "response": [] }, - "url": { - "raw": "{{base_url}}/{{version}}/internal/solr/update/resync", - "host": [ - "{{base_url}}" + { + "name": "restore status", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 200/OK\", function () {\r", + " pm.response.to.have.status(200); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } ], - "path": [ - "{{version}}", - "internal", - "solr", - "update", - "resync" - ] + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "POST", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } + ], + "body": { + "mode": "raw", + "raw": "{\r\n \"command\": \"restorestatus\"\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/command", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "command" + ] + } + }, + "response": [] } - }, - "response": [] + ] }, { - "name": "resync identifiers", - "event": [ + "name": "import", + "item": [ { - "listen": "test", - "script": { - "exec": [ - "var jsonData = pm.response.json()\r", - "\r", - "pm.test(\"Status code is 200/OK\", function () {\r", - " pm.response.to.have.status(201); \r", - "});\r", - "\r", - "pm.test('should return JSON', function () {\r", - " pm.response.to.have.header('Content-Type', 'application/json');\r", - "});\r", - "" + "name": "businesses", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 201/CREATED\", function () {\r", + " pm.response.to.have.status(201); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } + ], + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "PUT", + "header": [ + { + "key": "Accept", + "value": "application/json", + "type": "text" + }, + { + "key": "Account-Id", + "value": "{{account_id}}", + "type": "text" + } ], - "type": "text/javascript" - } - } - ], - "request": { - "auth": { - "type": "bearer", - "bearer": [ - { - "key": "token", - "value": "{{token}}", - "type": "string" + "body": { + "mode": "raw", + "raw": "{\r\n \"businesses\":[\r\n {\r\n \"id\":\"BC1234567\",\r\n \"identifier\": \"BC1234567\",\r\n \"legalType\": \"BEN\",\r\n \"name\": \"Test Benefit Company\",\r\n \"status\": \"ACTIVE\",\r\n \"goodStanding\": \"true\",\r\n \"bn\": \"123456789BC0001\",\r\n \"parties\": [\r\n {\r\n \"id\": \"BC1234567_1245\",\r\n \"parentLegalType\": \"BEN\",\r\n \"parentIdentifier\": \"BC1234567\",\r\n \"parentName\": \"Test Benefit Company\",\r\n \"parentStatus\": \"ACTIVE\",\r\n \"partyName\": \"Joe Solver\",\r\n \"partyRoles\": [\"significant individual\"],\r\n \"partyType\": \"person\",\r\n \"parentBN\": \"123456789BC0001\"\r\n },\r\n {\r\n \"id\": \"BC1234567_1246\",\r\n \"parentLegalType\": \"BEN\",\r\n \"parentIdentifier\": \"BC1234567\",\r\n \"parentName\": \"Test Benefit Company\",\r\n \"parentStatus\": \"ACTIVE\",\r\n \"partyName\": \"Willow Walaby\",\r\n \"partyRoles\": [\"significant individual\"],\r\n \"partyType\": \"person\",\r\n \"parentBN\": \"123456789BC0001\"\r\n }\r\n ]\r\n },\r\n {\r\n \"id\":\"CP987654321\",\r\n \"identifier\": \"CP987654321\",\r\n \"legalType\": \"CP\",\r\n \"name\": \"Tested Coop\",\r\n \"status\": \"ACTIVE\",\r\n \"goodStanding\": \"false\"\r\n },\r\n {\r\n \"id\":\"FM1234568\",\r\n \"identifier\": \"FM1234568\",\r\n \"legalType\": \"GP\",\r\n \"name\": \"Test Partnership\",\r\n \"status\": \"ACTIVE\",\r\n \"bn\": \"123456788BC0001\",\r\n \"parties\": [\r\n {\r\n \"id\": \"FM1234568_12355\",\r\n \"parentLegalType\": \"GP\",\r\n \"parentIdentifier\": \"FM1234568\",\r\n \"parentName\": \"Test Partnership\",\r\n \"parentStatus\": \"ACTIVE\",\r\n \"partyName\": \"Kyle Smile\",\r\n \"partyRoles\": [\"partner\"],\r\n \"partyType\": \"person\",\r\n \"parentBN\": \"123456788BC0001\"\r\n },\r\n {\r\n \"id\": \"FM1234568_12366\",\r\n \"parentLegalType\": \"GP\",\r\n \"parentIdentifier\": \"FM1234568\",\r\n \"parentName\": \"Test Partnership\",\r\n \"parentStatus\": \"ACTIVE\",\r\n \"partyName\": \"David Mortar\",\r\n \"partyRoles\": [\"partner\"],\r\n \"partyType\": \"person\",\r\n \"parentBN\": \"123456788BC0001\"\r\n }\r\n ]\r\n }\r\n ],\r\n \"timeout\":\"45\"\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/import", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "import" + ] } - ] - }, - "method": "POST", - "header": [ - { - "key": "Accept", - "value": "application/json", - "type": "text" }, - { - "key": "Account-Id", - "value": "{{account_id}}", - "type": "text" - } - ], - "body": { - "mode": "raw", - "raw": "{\r\n \"identifiers\": [\"BC0871330\"]\r\n}", - "options": { - "raw": { - "language": "json" - } - } + "response": [] }, - "url": { - "raw": "{{base_url}}/{{version}}/internal/solr/update/resync", - "host": [ - "{{base_url}}" + { + "name": "partial party", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "var jsonData = pm.response.json()\r", + "\r", + "pm.test(\"Status code is 201/CREATED\", function () {\r", + " pm.response.to.have.status(201); \r", + "});\r", + "\r", + "pm.test('should return JSON', function () {\r", + " pm.response.to.have.header('Content-Type', 'application/json');\r", + "});\r", + "" + ], + "type": "text/javascript", + "packages": {} + } + } ], - "path": [ - "{{version}}", - "internal", - "solr", - "update", - "resync" - ] + "request": { + "auth": { + "type": "bearer", + "bearer": [ + { + "key": "token", + "value": "{{token}}", + "type": "string" + } + ] + }, + "method": "PUT", + "header": [ + { + "key": "Accept", + "value": "application/json" + }, + { + "key": "Account-Id", + "value": "{{account_id}}" + } + ], + "body": { + "mode": "raw", + "raw": "{\r\n \"type\": \"partial\",\r\n \"businesses\": [\r\n {\r\n \"id\": \"BC1234567\",\r\n \"parties\": {\r\n \"add\": [\r\n {\r\n \"id\": \"BC1234567_12457\",\r\n \"partyName\": \"Ryan Wills\",\r\n \"partyRoles\": [\"significant individual\"],\r\n \"partyType\": \"person\"\r\n },\r\n {\r\n \"id\": \"BC1234567_124590\",\r\n \"partyName\": \"Filips Wills\",\r\n \"partyRoles\": [\"significant individual\"],\r\n \"partyType\": \"person\"\r\n }\r\n ]\r\n }\r\n }\r\n ]\r\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{internal_url}}/{{version}}/internal/solr/import", + "host": [ + "{{internal_url}}" + ], + "path": [ + "{{version}}", + "internal", + "solr", + "import" + ] + } + }, + "response": [] } - }, - "response": [] + ] } ] } diff --git a/search-api/tests/unit/api/businesses/test_search.py b/search-api/tests/unit/api/businesses/test_search.py deleted file mode 100644 index 31515f24..00000000 --- a/search-api/tests/unit/api/businesses/test_search.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Test-Suite to ensure that the search endpoints/functions work as expected.""" -import time -from dataclasses import asdict -from http import HTTPStatus - -import pytest -from flask import current_app, Flask - -from search_api.request_handlers import business_search, business_suggest, parties_search -from search_api.request_handlers.search import SearchParams -from search_api.services import search_solr -from search_api.services.solr import Solr -from search_api.services.solr.solr_fields import SolrField - -from tests import integration_solr -from tests.unit.services.test_solr import create_solr_doc, SOLR_TEST_DOCS - - -@pytest.mark.parametrize('test_name,query,mocked_terms,expected', [ - ('test-identifier', 'CP00', ['CP0034567'], ['CP0034567']), -]) -def test_business_suggest_identifier(session, client, requests_mock, test_name, query, mocked_terms, expected): - """Assert that solr business suggest call works as expected.""" - # setup solr mock - mocked_docs = [asdict(create_solr_doc(x, 'test doc', 'ACTIVE', 'BEN')) for x in mocked_terms] - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/suggest", json={}) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q=({SolrField.NAME_SINGLE.value}%3A{query})", json={}) - requests_mock.get( - f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q={SolrField.IDENTIFIER_Q.value}%3A{query} " + - f'OR {SolrField.BN_Q.value}:{query}', json={'response': {'docs': mocked_docs}}) - # call select - suggestions = business_suggest(query, True, None) - # test - assert len(suggestions) == len(expected) - for suggestion in suggestions: - assert suggestion['value'] in expected - - -@pytest.mark.parametrize('test_name,query,mocked_terms,expected', [ - ('test-bn', '0012334', ['BN00012334'], ['BN00012334']), -]) -def test_business_suggest_bn(session, client, requests_mock, test_name, query, mocked_terms, expected): - """Assert that solr business suggest call works as expected.""" - # setup solr mock - mocked_docs = [asdict(create_solr_doc('BC1234567', 'test doc', 'ACTIVE', 'BEN', x)) for x in mocked_terms] - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/suggest", json={}) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q=({SolrField.NAME_SINGLE.value}:{query})", json={}) - requests_mock.get( - f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q={SolrField.IDENTIFIER_Q.value}:{query} " + - f'OR {SolrField.BN_Q.value}:{query}', json={'response': {'docs': mocked_docs}}) - # call select - suggestions = business_suggest(query, True, None) - # test - assert len(suggestions) == len(expected) - for suggestion in suggestions: - assert suggestion['value'] in expected - - -@pytest.mark.parametrize('test_name,query,mocked_terms,expected', [ - ('test-name', 'test 2222', ['TEST 2222', 'TESTERS 2222156'], ['TEST 2222', 'TESTERS 2222156']), -]) -def test_business_suggest_name(session, client, requests_mock, test_name, query, mocked_terms, expected): - """Assert that solr business suggest call works as expected.""" - # setup solr mock - mocked_docs = [asdict(create_solr_doc('BC1234567', x, 'ACTIVE', 'BEN')) for x in mocked_terms] - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/suggest", json={'suggest': {'name': {query: {'suggestions': [{'term': mocked_terms[0]}]}}}}) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q=({SolrField.NAME_SINGLE.value}:{query.split()[0]})", json={'response': {'docs': [mocked_docs[1]]}}) - requests_mock.get( - f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q={SolrField.IDENTIFIER_Q.value}:{query} " + - f'OR {SolrField.BN_Q.value}:{query}', json={'response': {'docs': []}}) - # call select - suggestions = business_suggest(query, True, None) - # test - assert len(suggestions) == len(expected) - for suggestion in suggestions: - assert suggestion['value'] in expected - - -@pytest.mark.parametrize('test_name,query,mock_names,mock_ids,mock_bns,expected', [ - ('test-bn-identifier-name', '123', ['TEST 1234'], ['CP1234567'], ['BN00012334'], ['TEST 1234', 'CP1234567', 'BN00012334']), -]) -def test_business_suggest_all(session, client, requests_mock, test_name, query, mock_names, mock_ids, mock_bns, expected): - """Assert that search business suggest call works as expected.""" - # setup solr mock - mocked_name_docs = [asdict(create_solr_doc('BC0024562', x, 'ACTIVE', 'BEN')) for x in mock_names] - mocked_identifier_docs = [asdict(create_solr_doc(x, 'test identifier match', 'ACTIVE', 'BEN')) for x in mock_ids] - mocked_bn_docs = [asdict(create_solr_doc('BC0004567', 'test bn match', 'ACTIVE', 'BEN', x)) for x in mock_bns] - - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/suggest", json={'suggest': {'name': {query: {'suggestions': []}}}}) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q=({SolrField.NAME_SINGLE.value}:{query})", json={'response': {'docs': mocked_name_docs}}) - requests_mock.get( - f"{current_app.config.get('SOLR_SVC_URL')}/search/query?q={SolrField.IDENTIFIER_Q.value}:{query} " + - f'OR {SolrField.BN_Q.value}:{query}', json={'response': {'docs': mocked_identifier_docs + mocked_bn_docs}}) - # call select - suggestions = business_suggest(query, True, None) - # test - assert len(suggestions) == len(expected) - for suggestion in suggestions: - assert suggestion['value'] in expected - - -@pytest.mark.parametrize('test_name,query,mock_names,mock_ids,mock_bns,expected', [ - ('test-bus-search', - '123', - [asdict(x) for x in SOLR_TEST_DOCS[:2]], - [asdict(x) for x in SOLR_TEST_DOCS[2:4]], - [asdict(x) for x in SOLR_TEST_DOCS[4:5]], - [asdict(x) for x in SOLR_TEST_DOCS[:5]]), -]) -def test_business_search(session, client, requests_mock, test_name, query, mock_names, mock_ids, mock_bns, expected): - """Assert that search business search call works as expected.""" - # setup solr mock - num_found = len(expected) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': mock_names + mock_ids + mock_bns, 'numFound': num_found, 'start': 0}}) - # call select - params = SearchParams({'value': query}, None, None) - results = business_search(params) - # test - assert results['response']['docs'] == expected - assert results['response']['numFound'] == num_found - assert results['response']['start'] == 0 - - -@pytest.mark.parametrize('test_name,query,mock_docs', [ - ('test-party-search', - '1', - [asdict(x) for x in SOLR_TEST_DOCS[8:10]]), -]) -def test_parties_search(session, client, requests_mock, test_name, query, mock_docs): - """Assert that search parties search call works as expected.""" - # setup solr mock - parties_docs = [] - for doc in mock_docs: - parties_docs += doc['parties'] - num_found = len(parties_docs) - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': parties_docs, 'numFound': num_found, 'start': 0}}) - # call select - params = SearchParams({'value': query}, None, None) - results = parties_search(params) - # test - assert results['response']['docs'] == parties_docs - assert results['response']['numFound'] == num_found - assert results['response']['start'] == 0 - - -@pytest.mark.parametrize('test_name,query,mocks,highlight,expected', [ - ('test-single-result', '123', ['123 test name'], False, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}]), - ('test-single-result-highlight', '123', ['123 test name'], True, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}]), - ('test-two-results', '123', ['123 test name', 'BC0001234'], False, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}, {'type': SolrField.IDENTIFIER.value, 'value': 'BC0001234'}]), - ('test-two-results-highlight', '123', ['123 test name', 'BC0001234'], True, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}, {'type': SolrField.IDENTIFIER.value, 'value': 'BC0001234'}]), - ('test-three-results', '123', ['123 test name', 'BC0001234', '123456789BC0001'], False, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}, {'type': SolrField.IDENTIFIER.value, 'value': 'BC0001234'}, {'type': SolrField.BN.value, 'value': '123456789BC0001'}]), - ('test-three-results-highlight', '123', ['123 test name', 'BC0001234', '123456789BC0001'], True, [{'type': SolrField.NAME.value, 'value': '123 TEST NAME'}, {'type': SolrField.IDENTIFIER.value, 'value': 'BC0001234'}, {'type': SolrField.BN.value, 'value': '123456789BC0001'}]), -]) -def test_endpoint_suggest(session, client, requests_mock, test_name, query, mocks, highlight, expected): - """Assert that search suggest endpoint works as expected.""" - # setup mock - need to add more here if max_results > 1 - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/suggest", json={'suggest': {'name': {query: {'suggestions': [{'term': mocks[0]}]}}}}) - if len(mocks) > 2: - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': [{SolrField.IDENTIFIER.value: mocks[1]}, {SolrField.IDENTIFIER.value: '', SolrField.BN.value: mocks[2]}]}}) - elif len(mocks) > 1: - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': [{SolrField.IDENTIFIER.value: mocks[1]}]}}) - # call endpoint - url = f'/api/v1/businesses/search/suggest?query={query}&rows={len(mocks)}' - if highlight: - url += f'&highlight={highlight}' - resp = client.get(url) - # check response - assert resp.status_code == HTTPStatus.OK - assert resp.json['results'] == expected - - -@pytest.mark.parametrize('test_name,query_params,mock_names,mock_ids,mock_bns,expected_docs', [ - ('test_facets', - {'query': '123', 'start': 0, 'rows': 5}, - [asdict(x) for x in SOLR_TEST_DOCS[:2]], - [asdict(x) for x in SOLR_TEST_DOCS[2:4]], - [asdict(x) for x in SOLR_TEST_DOCS[4:5]], - [asdict(x) for x in SOLR_TEST_DOCS[:5]]), - ('test_special_chars_only', {'query': '`~!@#$%^*()_-={}[]\\|', 'start': 0, 'rows': 10}, [], [], [], []), - ('test_:_only', {'query': ':test', 'start': 0, 'rows': 10}, [], [], [], []), - ('test_start_with_:', {'query': ':test', 'start': 0, 'rows': 10}, [], [], [], []) -]) -def test_endpoint_facets(session, client, requests_mock, test_name, query_params, mock_names, mock_ids, mock_bns, expected_docs): - """Assert that search facets endpoint works as expected.""" - # setup mock - num_found = len(expected_docs) - facets_mock = {'facet_counts': {'facet_fields': {SolrField.TYPE.value: ['BEN', 23, 'CP', 10, 'SP', 102], SolrField.STATE.value: ['ACTIVE', 23, 'HISTORICAL', 10]}}} - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': mock_names + mock_ids + mock_bns, 'numFound': num_found, 'start': 0}, **facets_mock}) - # call endpoint - query = query_params['query'] - start = query_params['start'] - rows = query_params['rows'] - resp = client.get(f'/api/v1/businesses/search/facets?query=value:{query}&start={start}&rows={rows}') - # check response - assert resp.status_code == HTTPStatus.OK - if expected_docs: - assert resp.json['facets'] == Solr.parse_facets(facets_mock) - assert resp.json['searchResults']['queryInfo']['rows'] == rows - assert resp.json['searchResults']['queryInfo']['query']['value'] == query or query[0] == ':' - assert resp.json['searchResults']['queryInfo']['start'] == start - assert resp.json['searchResults']['totalResults'] == num_found - assert resp.json['searchResults']['results'] == expected_docs - - -@pytest.mark.parametrize('test_name,query_params,mock_docs', [ - ('test_parties', {'query': '1', 'start': 0, 'rows': 5}, [asdict(x) for x in SOLR_TEST_DOCS[8:10]]), - ('test_parties_:_only', {'query': ':', 'start': 0, 'rows': 5}, []), - ('test_parties_start_with_:', {'query': ':test', 'start': 0, 'rows': 5}, []), -]) -def test_endpoint_parties(session, client, requests_mock, test_name, query_params, mock_docs): - """Assert that search parties endpoint works as expected.""" - # setup mock - parties_docs = [] - for doc in mock_docs: - parties_docs += doc['parties'] - num_found = len(parties_docs) - facets_mock = {'facet_counts': {'facet_fields': {SolrField.TYPE.value: ['SP', 2, 'GP', 10], SolrField.STATE.value: ['ACTIVE', 7, 'HISTORICAL', 5], SolrField.PARTY_ROLE.value: ['proprietor', 2, 'partner', 13]}}} - requests_mock.get(f"{current_app.config.get('SOLR_SVC_URL')}/search/query", json={'response': {'docs': parties_docs, 'numFound': num_found, 'start': 0}, **facets_mock}) - # call endpoint - query = query_params['query'] - start = query_params['start'] - rows = query_params['rows'] - resp = client.get(f'/api/v1/businesses/search/parties?query=value:{query}&start={start}&rows={rows}&categories=partyRoles:partner,proprietor') - # check response - assert resp.status_code == HTTPStatus.OK - assert resp.json['facets'] == Solr.parse_facets(facets_mock) - assert resp.json['searchResults']['queryInfo']['rows'] == rows - assert resp.json['searchResults']['queryInfo']['query']['value'] == query or query[0] == ':' - assert resp.json['searchResults']['queryInfo']['start'] == start - assert resp.json['searchResults']['queryInfo']['categories']['partyRoles'] == ['partner', 'proprietor'] - assert resp.json['searchResults']['totalResults'] == num_found - assert resp.json['searchResults']['results'] == parties_docs - - -@integration_solr -@pytest.mark.parametrize('test_name,doc_name,path,endpoint,match', [ - ('test_bus_search_value', 'Test basic query', 'facets?query=value:basic', 'facets', True), - ('test_bus_search_name', 'Test names filter', 'facets?query=value:test::name:names', 'facets', True), - ('test_bus_search_name_no_match', 'Test name filter no match', 'facets?query=value:test::name:names', 'facets', False), - ('test_bus_search_name_&', 'Test name filter & match', 'facets?query=value:test::name:filter&', 'facets', True), - ('test_party_search_value', 'Test basic party query', 'parties?query=value:party&categories=partyRoles:partner,proprietor', 'parties', True), - ('test_party_search_owner_name', 'Test party owner name filter query', 'parties?query=value:party::partyName:person owner name filter&categories=partyRoles:partner,proprietor', 'parties', True), - ('test_party_search_parent_name', 'Test party parent name filter query', 'parties?query=value:party::parentName:name filter&categories=partyRoles:partner,proprietor', 'parties', True), -]) -def test_endpoint_full_integration(app, session, client, test_name, doc_name, path, endpoint, match): - """Assert that search endpoints work as expected.""" - search_solr.init_app(app) - search_solr.delete_all_docs() - search_solr.create_or_replace_docs([create_solr_doc('FM0000001', doc_name, 'ACTIVE', 'SP', '123', [(f'person {doc_name}', 'proprietor', 'person')])]) - time.sleep(2) # wait for solr to register update - resp = client.get(f'/api/v1/businesses/search/{path}') - - assert resp.status_code == HTTPStatus.OK - assert resp.json['searchResults']['totalResults'] == (1 if match else 0) - validation_field = 'name' if endpoint == 'facets' else 'parentName' - if match: - assert resp.json['searchResults']['results'][0][validation_field] == doc_name diff --git a/search-api/tests/unit/api/internal/__init__.py b/search-api/tests/unit/api/internal/__init__.py index 090d1744..d8711089 100644 --- a/search-api/tests/unit/api/internal/__init__.py +++ b/search-api/tests/unit/api/internal/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Test-Suite for the API.""" +"""Test-Suite for the internal API.""" diff --git a/search-api/src/search_api/services/solr/__init__.py b/search-api/tests/unit/api/internal/solr/__init__.py similarity index 78% rename from search-api/src/search_api/services/solr/__init__.py rename to search-api/tests/unit/api/internal/solr/__init__.py index 1a77da90..ddbdc588 100644 --- a/search-api/src/search_api/services/solr/__init__.py +++ b/search-api/tests/unit/api/internal/solr/__init__.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Province of British Columbia +# Copyright © 2024 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,5 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""This module wraps the solr classes/fields for using registries search solr.""" -from .solr import Solr +"""Test-Suite for the internal solr API endpoints.""" diff --git a/search-api/tests/unit/api/internal/solr/test_backup.py b/search-api/tests/unit/api/internal/solr/test_backup.py new file mode 100644 index 00000000..07da7638 --- /dev/null +++ b/search-api/tests/unit/api/internal/solr/test_backup.py @@ -0,0 +1,91 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the solr backup endpoint works as expected.""" +from http import HTTPStatus + +import pytest +import requests_mock + +from search_api.services.authz import SYSTEM_ROLE, STAFF_ROLE, PUBLIC_USER + +from tests import integration_solr +from tests.unit.services.utils import create_header + + +@pytest.mark.parametrize('test_name,command', [ + ('test_backup', 'backup'), + ('test_restore', 'restore'), + ('test_restorestatus', 'restorestatus'), + ('test_details', 'details') +]) +def test_replicate_solr_mocked(app, client, jwt, test_name: str, command: str): + """Assert that the backup endpoint sends the correct call to solr.""" + solr_url = app.config.get('SOLR_SVC_LEADER_URL') + f'/bor/replication?command={command}' + + with requests_mock.mock() as m: + m.post(solr_url) + + api_response = client.post(f'/api/v1/internal/solr/command', + json={'command': command}, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + + # check success + assert api_response.status_code == HTTPStatus.OK + + # check call to solr mock + assert m.called == True + assert m.call_count == 1 + + +@integration_solr +@pytest.mark.parametrize('test_name,command', [ + ('test_backup', 'backup'), + ('test_restore', 'restore'), + ('test_restorestatus', 'restorestatus'), + ('test_details', 'details') +]) +def test_replicate_solr_mocked(app, client, jwt, test_name: str, command: str): + """Assert that the backup endpoint is successful.""" + api_response = client.post(f'/api/v1/internal/solr/command', + json={'command': command}, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + + # check success + assert api_response.status_code == HTTPStatus.OK + + +@pytest.mark.parametrize('test_name,payload', [ + ('missing_command', {'bla': 2}), + ('invalid_command', {'command': 'invalid'}), +]) +def test_backup_solr_invalid_data(app, session, client, jwt, test_name, payload): + """Assert that error is returned if payload is invalid.""" + api_response = client.post(f'/api/v1/internal/solr/command', + json=payload, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + assert api_response.status_code == HTTPStatus.BAD_REQUEST + + +def test_backup_solr_unauthorized(client, jwt): + """Assert that error is returned if unauthorized.""" + for role in [STAFF_ROLE, PUBLIC_USER]: + api_response = client.post(f'/api/v1/internal/solr/command', + data={'command': 'command'}, + headers=create_header(jwt, [role], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.UNAUTHORIZED diff --git a/search-api/tests/unit/api/internal/solr/test_import.py b/search-api/tests/unit/api/internal/solr/test_import.py new file mode 100644 index 00000000..dc7f0fd2 --- /dev/null +++ b/search-api/tests/unit/api/internal/solr/test_import.py @@ -0,0 +1,131 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the solr doc import enpoint works as expected.""" +import time +from dataclasses import asdict +from http import HTTPStatus + +import pytest +import requests_mock + +from search_api.services import business_solr +from search_api.services.authz import SYSTEM_ROLE + +from tests import integration_solr +from tests.unit.services.utils import create_header +from tests.unit.utils import SOLR_TEST_DOCS + + +@pytest.mark.parametrize('test_name,docs', [ + ('single', [SOLR_TEST_DOCS[0]]), + ('multiple', SOLR_TEST_DOCS), +]) +def test_import_solr_mocked(app, session, client, jwt, test_name, docs): + """Assert that update operation sends correct payload to solr.""" + solr_url = app.config.get('SOLR_SVC_BUS_LEADER_URL') + '/business/update?commit=true&overwrite=true&wt=json' + docs_json = [asdict(x) for x in docs] + with requests_mock.mock() as m: + m.post(solr_url) + api_response = client.put(f'/api/v1/internal/solr/import', + json={'businesses': docs_json}, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + + # check success + assert api_response.status_code == HTTPStatus.CREATED + + # check call to solr was correct + assert m.called == True + assert m.call_count == 1 # batch updated all docs + assert solr_url in m.request_history[0].url + + expected = [] + for doc in docs_json: + update_doc = {**doc} + if parties := update_doc.get('parties'): + update_doc['parties'] = {'set': parties} + expected.append(update_doc) + + assert m.request_history[0].json() == expected + + +@integration_solr +def test_update_solr(session, client, jwt): + """Assert that the import operation is successful.""" + # setup -- start with no docs + business_solr.delete_all_docs() + # import + docs_json = [asdict(x) for x in SOLR_TEST_DOCS] + api_response = client.put(f'/api/v1/internal/solr/import', + json={'businesses': docs_json}, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.CREATED + + # check solr for updated records + time.sleep(2) # wait for solr to register update + for entity in SOLR_TEST_DOCS: + search_response = business_solr.query(payload={'query': f'id:{entity.id}', 'fields': '*'}) + assert search_response['response'] + assert search_response['response']['docs'] + assert len(search_response['response']['docs']) == 1 + + # do partial import + identifier = SOLR_TEST_DOCS[3].identifier + party_1_name = 'Test person si 1' + party_2_name = 'Test person si 2' + docs_json = [{ + 'id': identifier, + 'parties': { + 'add': [ + { + 'id': f'{identifier}_12457', + 'partyName': party_1_name, + 'partyRoles': ['significant individual'], + 'partyType': 'person' + }, + { + 'id': f'{identifier}_124590', + 'partyName': party_2_name, + 'partyRoles': ['significant individual'], + 'partyType': 'person' + } + ] + } + }] + api_response = client.put(f'/api/v1/internal/solr/import', + json={'type': 'partial', 'businesses': docs_json}, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + time.sleep(2) # wait for solr to register update + search_response = business_solr.query(payload={'query': f'id:{identifier}', 'fields': '*, [child]'}) + assert search_response['response'] + assert search_response['response']['docs'] + assert len(search_response['response']['docs']) == 1 + assert len(search_response['response']['docs'][0]['parties']) == 2 + assert search_response['response']['docs'][0]['parties'][0]['partyName'] == party_1_name + assert search_response['response']['docs'][0]['parties'][1]['partyName'] == party_2_name + + + +def test_update_solr_unauthorized(client, jwt): + """Assert that error is returned if unauthorized.""" + docs_json = [asdict(x) for x in SOLR_TEST_DOCS] + api_response = client.put(f'/api/v1/internal/solr/import', + json={'entities': docs_json}, + headers=create_header(jwt, [], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.UNAUTHORIZED diff --git a/search-api/tests/unit/api/internal/solr/update/__init__.py b/search-api/tests/unit/api/internal/solr/update/__init__.py new file mode 100644 index 00000000..6dc4a64e --- /dev/null +++ b/search-api/tests/unit/api/internal/solr/update/__init__.py @@ -0,0 +1,30 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite for the internal solr update API endpoints.""" +from search_api.enums import SolrDocEventStatus, SolrDocEventType +from search_api.models import SolrDoc +from search_api.services.business_solr.doc_models import BusinessDoc + + +def check_update_recorded(identifier: str, status=SolrDocEventStatus.PENDING): + """Assert the given identifier was recorded for an update.""" + solr_doc = SolrDoc.find_most_recent_by_identifier(identifier) + assert solr_doc + assert solr_doc.identifier == identifier + assert BusinessDoc(**solr_doc.doc).id == identifier + assert solr_doc._submitter_id is not None + doc_events = solr_doc.solr_doc_events.all() + assert len(doc_events) == 1 + assert doc_events[0].event_status == status + assert doc_events[0].event_type == SolrDocEventType.UPDATE diff --git a/search-api/tests/unit/api/internal/solr/update/test_resync_solr.py b/search-api/tests/unit/api/internal/solr/update/test_resync_solr.py new file mode 100644 index 00000000..b824b91c --- /dev/null +++ b/search-api/tests/unit/api/internal/solr/update/test_resync_solr.py @@ -0,0 +1,178 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the solr business update endpoints/functions work as expected.""" +import time +from copy import deepcopy +from dataclasses import asdict +from datetime import datetime, timedelta +from http import HTTPStatus + +import pytest +import requests_mock + +from search_api.enums import SolrDocEventStatus, SolrDocEventType +from search_api.models import SolrDoc, SolrDocEvent +from search_api.services import business_solr +from search_api.services.authz import SYSTEM_ROLE, STAFF_ROLE, PUBLIC_USER +from search_api.services.business_solr.doc_models import BusinessDoc + +from tests import integration_solr +from tests.unit.services.utils import create_header +from tests.unit.utils import SOLR_TEST_DOCS + + +def prep_resync(businesses: list[BusinessDoc]) -> list[tuple[BusinessDoc, SolrDoc, SolrDoc]]: + """Setup resync state.""" + setup_info = [] + for orig_bus in businesses: + # set one record to find and one record to miss (older / current version) + business = deepcopy(orig_bus) + business.name = f'{business.id} test_update_business_in_solr' + solr_doc = SolrDoc(doc=asdict(business), identifier=business.id).save() + SolrDocEvent(solr_doc_id=solr_doc.id, event_status=SolrDocEventStatus.COMPLETE, event_type=SolrDocEventType.UPDATE).save() + entity_old = deepcopy(business) + entity_old.name = f'{business.id} test_should_not_have_updated' + solr_doc_old = SolrDoc(doc=asdict(entity_old), identifier=entity_old.id).save() + solr_doc_old.submission_date = datetime.utcnow() - timedelta(minutes=10) + solr_doc_old.save() + SolrDocEvent(solr_doc_id=solr_doc_old.id, event_status=SolrDocEventStatus.COMPLETE, event_type=SolrDocEventType.UPDATE).save() + setup_info.append((business, solr_doc, solr_doc_old)) + + return setup_info + + +@pytest.mark.parametrize('test_name,payload,businesses', [ + ('resync_minutes_single', {'minutesOffset': 5}, [SOLR_TEST_DOCS[0]]), + ('resync_minutes_multi', {'minutesOffset': 5}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1]]), + ('resync_minutes_mix', {'minutesOffset': 5}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1], SOLR_TEST_DOCS[2]]), + ('resync_minutes_nothing_to_do', {'minutesOffset': 5}, []), + ('resync_identifiers_single', {'identifiers': []}, [SOLR_TEST_DOCS[0]]), + ('resync_identifiers_multi', {'identifiers': []}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1]]), + ('resync_identifiers_mix', {'identifiers': []}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1], SOLR_TEST_DOCS[2]]), +]) +def test_resync_solr_mocked(app, session, client, jwt, test_name, payload: dict, businesses: list[BusinessDoc]): + """Assert that resync operation sends correct payload to solr.""" + solr_url = app.config.get('SOLR_SVC_BUS_LEADER_URL') + '/business/update?commit=true&overwrite=true&wt=json' + if 'identifiers' in payload: + payload['identifiers'] = [x.id for x in businesses] + + setup_info = prep_resync(businesses) + + with requests_mock.mock() as m: + m.post(solr_url) + + api_response = client.post(f'/api/v1/internal/solr/update/resync', + json=payload, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + + # check success + assert api_response.status_code == HTTPStatus.CREATED + + if not businesses: + # should not have resynced anything since nothing to update + assert m.called == False + assert m.call_count == 0 + else: + # check call to solr mock + assert m.called == True + assert m.call_count == 1 # batch call for all entities + + for info in setup_info: + business = info[0] + solr_doc = info[1] + solr_doc_old = info[2] + doc_events = solr_doc.solr_doc_events.all() + assert len(doc_events) == 2 + for event in doc_events: + assert event.event_status == SolrDocEventStatus.COMPLETE + assert event.event_type in [SolrDocEventType.RESYNC, SolrDocEventType.UPDATE] + # did not update the older record + assert len(solr_doc_old.solr_doc_events.all()) == 1 + + assert solr_url in m.request_history[0].url + + business_in_payload = False + for payload_business in m.request_history[0].json(): + # this info was sent as a payload + business_after_set_conversion = asdict(business) + for key in ['parties']: + if key_value := business_after_set_conversion.get(key): + business_after_set_conversion[key] = {'set': key_value} + + if payload_business == business_after_set_conversion: + business_in_payload = True + break + assert business_in_payload + + +@integration_solr +@pytest.mark.parametrize('test_name,payload,businesses', [ + ('resync_minutes', {'minutesOffset': 5}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1], SOLR_TEST_DOCS[2]]), + ('resync_identifiers', {'identifiers': []}, [SOLR_TEST_DOCS[0], SOLR_TEST_DOCS[1], SOLR_TEST_DOCS[2]]), +]) +def test_resync_solr(session, client, jwt, test_name, payload: dict, businesses: list[BusinessDoc]): + """Assert that the resync update operation is successful.""" + if 'identifiers' in payload: + payload['identifiers'] = [x.id for x in businesses] + + # remove any existing solr docs + business_solr.delete_all_docs() + + setup_info = prep_resync(businesses) + api_response = client.post(f'/api/v1/internal/solr/update/resync', + json=payload, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.CREATED + + for business, solr_doc, solr_doc_old in setup_info: + doc_events = solr_doc.solr_doc_events.all() + assert len(doc_events) == 2 + for event in doc_events: + assert event.event_status == SolrDocEventStatus.COMPLETE + assert event.event_type in [SolrDocEventType.RESYNC, SolrDocEventType.UPDATE] + # did not update the older record + assert len(solr_doc_old.solr_doc_events.all()) == 1 + + time.sleep(2) # wait for solr to register update + search_response = business_solr.query(payload={'query': f'id:{business.id}', 'fields': '*'}) + assert search_response['response']['numFound'] == 1 + assert search_response['response']['docs'][0]['name'] == business.name + + +@pytest.mark.parametrize('test_name,payload', [ + ('missing_required_field', {'bla': 2}), + ('invalid_minute', {'minutesOffset': 'invalid'}), +]) +def test_resync_solr_invalid_data(app, session, client, jwt, test_name, payload): + """Assert that error is returned if data missing.""" + + api_response = client.post(f'/api/v1/internal/solr/update/resync', + json=payload, + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + assert api_response.status_code == HTTPStatus.BAD_REQUEST + + +def test_update_solr_unauthorized(client, jwt): + """Assert that error is returned if unauthorized.""" + for role in [STAFF_ROLE, PUBLIC_USER]: + api_response = client.post(f'/api/v1/internal/solr/update/resync', + data={'identifiers': ['BC1234567']}, + headers=create_header(jwt, [role], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.UNAUTHORIZED diff --git a/search-api/tests/unit/api/internal/solr/update/test_update_solr.py b/search-api/tests/unit/api/internal/solr/update/test_update_solr.py new file mode 100644 index 00000000..74c79f55 --- /dev/null +++ b/search-api/tests/unit/api/internal/solr/update/test_update_solr.py @@ -0,0 +1,212 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the solr doc update enpoint works as expected.""" +import json +import time +from copy import deepcopy +from http import HTTPStatus + +import pytest +import requests_mock + +from search_api.enums import SolrDocEventStatus +from search_api.services import business_solr +from search_api.services.authz import SYSTEM_ROLE + +from tests import integration_solr +from tests.unit.services.utils import create_header +from tests.unit.utils import (SOLR_UPDATE_REQUEST_TEMPLATE_CORP as CORP_TEMPLATE, + SOLR_UPDATE_REQUEST_TEMPLATE_FIRM as FIRM_TEMPLATE) + +from . import check_update_recorded + +@pytest.mark.parametrize('test_name,request_json', [ + ('corp', CORP_TEMPLATE), + ('firm', FIRM_TEMPLATE) +]) +def test_update_solr_mocked(app, session, client, jwt, test_name, request_json): + """Assert that update operation sends correct payload to solr.""" + solr_url_update = app.config.get('SOLR_SVC_BUS_LEADER_URL') + '/business/update?commit=true&overwrite=true&wt=json' + + with requests_mock.mock() as m: + m.post(solr_url_update) + + api_response = client.put(f'/api/v1/internal/solr/update', + data=json.dumps(request_json), + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + + # check success + assert api_response.status_code == HTTPStatus.ACCEPTED + # check business update + business_identifier = request_json['business']['identifier'] + check_update_recorded(business_identifier) + + # check did not call to solr mock (only updates the DB) + assert m.called == False + # call sync to update solr + api_response = client.get(f'/api/v1/internal/solr/update/sync', headers={'content-type': 'application/json'}) + # check success + assert api_response.status_code == HTTPStatus.OK + # check events were completed + business_identifier = request_json['business']['identifier'] + check_update_recorded(business_identifier, status=SolrDocEventStatus.COMPLETE) + # check call to solr was correct + assert m.called == True + assert m.call_count == 1 + + # verify record update + assert solr_url_update in m.request_history[0].url + if test_name == 'corp': + assert m.request_history[0].json() == [ + { + 'id': 'BC1233987', + 'identifier': 'BC1233987', + 'legalType': 'BEN', + 'name': 'Benefit test comp', + 'status': 'ACTIVE', + 'goodStanding': False, + 'bn': '987654321BC0001', + 'parties': None + } + ] + else: + assert m.request_history[0].json() == [ + { + 'id': 'FM1233334', + 'identifier': 'FM1233334', + 'legalType': 'SP', + 'name': 'Test ABC', + 'status': 'ACTIVE', + 'goodStanding': None, + 'bn': '123456789', + 'parties': { + 'set': [ + { + 'id': 'FM1233334_1', + 'parentBN': '123456789', + 'partyName': 'TEST ABC', + 'partyType': 'organization', + 'parentName': 'Test ABC', + 'partyRoles': ['proprietor'], + 'parentStatus': 'ACTIVE', + 'parentLegalType': 'SP', + 'parentIdentifier': 'FM1233334' + } + ] + } + } + ] + + +@integration_solr +@pytest.mark.parametrize('test_name,request_json', [ + ('corp', CORP_TEMPLATE), + ('firm', FIRM_TEMPLATE) +]) +def test_update_solr(session, client, jwt, test_name, request_json): + """Assert that update operation is successful.""" + # setup -- start with no docs + business_solr.delete_all_docs() + time.sleep(2) # wait for solr to register update + # update + api_response = client.put(f'/api/v1/internal/solr/update', + data=json.dumps(request_json), + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.ACCEPTED + business_identifier = request_json['business']['identifier'] + party_ids = [] + si_ids = [] + # check business update + check_update_recorded(business_identifier) + # verify update has NOT synced to solr yet + search_response = business_solr.query(payload={'query': f'id:{business_identifier}', 'fields': '*'}) + assert search_response['response'] + assert len(search_response['response']['docs']) == 0 + # call sync to update solr + api_response = client.get(f'/api/v1/internal/solr/update/sync', headers={'content-type': 'application/json'}) + # check success + assert api_response.status_code == HTTPStatus.OK + # check events were completed + check_update_recorded(business_identifier, status=SolrDocEventStatus.COMPLETE) + # check solr for updated records + time.sleep(2) # wait for solr to register update + # verify search returns updated records + search_response = business_solr.query(payload={'query': f'id:{business_identifier}', 'fields': '*'}) + assert search_response['response'] + assert len(search_response['response']['docs']) == 1 + + +@pytest.mark.parametrize('test_name,legal_type,identifier,expected', [ + ('test_bc_add_prfx', 'BC', '0123456', 'BC0123456'), + ('test_cc_add_prfx', 'CC', '1234567', 'BC1234567'), + ('test_ulc_add_prfx', 'ULC', '2345678', 'BC2345678'), + ('test_ben_add_prfx', 'BEN', '0000001', 'BC0000001'), + ('test_bc_prfx_given', 'BC', 'BC0123466', 'BC0123466'), + ('test_cc_prfx_given', 'CC', 'BC1234577', 'BC1234577'), + ('test_ulc_prfx_given', 'ULC', 'BC234588', 'BC234588'), + ('test_ben_add_prfx', 'BEN', 'BC0000002', 'BC0000002'), + ('test_wrong_type_no_prfx', 'S', '0000003', '0000003'), + ('test_wrong_type_prfx_given', 'S', 'S3456790', 'S3456790') +]) +def test_update_bc_class_adds_prefix(app, session, client, jwt, test_name, legal_type, identifier, expected): + """Assert prefixes are added to BC, ULC and CC identifiers and only when no prefix is given.""" + solr_url = app.config.get('SOLR_SVC_BUS_LEADER_URL') + '/bor/update?commit=true&overwrite=true&wt=json' + + with requests_mock.mock() as m: + m.post(solr_url) + + request_json = deepcopy(CORP_TEMPLATE) + request_json['business']['legalType'] = legal_type + request_json['business']['identifier'] = identifier + + api_response = client.put(f'/api/v1/internal/solr/update', + data=json.dumps(request_json), + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.ACCEPTED + # check business update in model with altered identfier + check_update_recorded(expected) + + +@integration_solr +@pytest.mark.parametrize('test_name, party_type, good_standing', [ + ('invalid_goodStanding', 'organization', 'non-boolean'), + ('invalid_partyType', 'invalid type', 'true'), +]) +def test_update_business_in_solr_invalid_data(session, client, jwt, test_name, party_type, good_standing): + """Assert that error is returned.""" + request_json = deepcopy(FIRM_TEMPLATE) + request_json['parties'][0]['officer']['partyType'] = party_type + request_json['business']['goodStanding'] = good_standing + api_response = client.put(f'/api/v1/internal/solr/update', + data=json.dumps(request_json), + headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', + 'content-type': 'application/json'}) + ) + # check + assert api_response.status_code == HTTPStatus.BAD_REQUEST + + +def test_update_solr_unauthorized(client, jwt): + """Assert that error is returned if unauthorized.""" + api_response = client.put(f'/api/v1/internal/solr/update', + data=json.dumps(CORP_TEMPLATE), + headers=create_header(jwt, [], **{'Accept-Version': 'v1', + 'content-type': 'application/json'})) + # check + assert api_response.status_code == HTTPStatus.UNAUTHORIZED diff --git a/search-api/tests/unit/api/internal/test_update_solr.py b/search-api/tests/unit/api/internal/test_update_solr.py deleted file mode 100644 index 16603cb4..00000000 --- a/search-api/tests/unit/api/internal/test_update_solr.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Test-Suite to ensure that the solr business update endpoints/functions work as expected.""" -import json -import time -from copy import deepcopy -from dataclasses import asdict -from datetime import datetime, timedelta -from http import HTTPStatus - -import pytest - -from search_api.enums import SolrDocEventStatus, SolrDocEventType -from search_api.models import SolrDoc -from search_api.services.authz import SYSTEM_ROLE -from search_api.services.solr.solr_docs import BusinessDoc - -from tests.unit.utils import (SOLR_UPDATE_REQUEST_TEMPLATE_CORP as CORP_TEMPLATE, - SOLR_UPDATE_REQUEST_TEMPLATE_FIRM as FIRM_TEMPLATE) -from tests.unit.services.test_solr import SOLR_TEST_DOCS -from tests.unit.services.utils import create_header -from tests import integration_solr - - -@integration_solr -@pytest.mark.parametrize('test_name,template', [ - ('test_corp', CORP_TEMPLATE), - ('test_firm', FIRM_TEMPLATE) -]) -def test_update_business_in_solr(session, client, jwt, test_name, template): - """Assert that update operation is successful.""" - api_response = client.put(f'/api/v1/internal/solr/update', - data=json.dumps(template), - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - # check - assert api_response.status_code == HTTPStatus.OK - identifier = template['business']['identifier'] - solr_doc = SolrDoc.find_most_recent_by_identifier(identifier) - assert solr_doc.identifier == identifier - assert BusinessDoc(**solr_doc.doc).identifier == identifier - assert BusinessDoc(**solr_doc.doc).identifier_q == identifier - assert solr_doc._submitter_id is not None - doc_events = solr_doc.solr_doc_events.all() - assert len(doc_events) == 1 - assert doc_events[0].event_status == SolrDocEventStatus.COMPLETE - assert doc_events[0].event_type == SolrDocEventType.UPDATE - time.sleep(2) # wait for solr to register update - identifier = template['business']['identifier'] - search_response = client.get(f'/api/v1/businesses/search/facets?query=value:{identifier}', - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - assert search_response.status_code == HTTPStatus.OK - assert len(search_response.json['searchResults']['results']) == 1 - result = search_response.json['searchResults']['results'][0] - print(result) - if template['business']['legalType'] in ['SP', 'GP']: - assert result['name'] == template['business']['alternateNames'][0]['operatingName'] - else: - assert result['name'] == template['business']['legalName'] - -@integration_solr -@pytest.mark.parametrize('test_name, legal_name, good_standing, tax_id', [ - ('remove-tax-id', 'ABCD Prop', 'true', None), - ('update-legal-name', 'ABCDE Prop', 'true', '123456789'), - ('update-good-standing-string-false', 'ABCD Prop', 'false', '123456789'), - ('update-good-standing-string-true', 'ABCD Prop', 'true', '123456789'), - ('update-good-standing-boolean-false', 'ABCD Prop', False, '123456789'), - ('update-good-standing-boolean-true', 'ABCD Prop', True, '123456789'), - ('update-good-standing-none', 'ABCD Prop', None, '123456789'), -]) -def test_update_business_in_solr_with_varying_data(session, client, jwt, test_name, legal_name, good_standing, tax_id): - """Assert that update operation is successful.""" - request_json = deepcopy(CORP_TEMPLATE) - request_json['business']['legalName'] = legal_name - request_json['business']['goodStanding'] = good_standing - request_json['business']['taxId'] = tax_id - print(request_json) - api_response = client.put(f'/api/v1/internal/solr/update', - data=json.dumps(request_json), - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - # check - print(api_response.json) - assert api_response.status_code == HTTPStatus.OK - time.sleep(2) # wait for solr to register update - identifier = request_json['business']['identifier'] - search_response = client.get(f'/api/v1/businesses/search/facets?query=value:{identifier}', - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - assert search_response.status_code == HTTPStatus.OK - assert len(search_response.json['searchResults']['results']) == 1 - - -@integration_solr -@pytest.mark.parametrize('test_name,legal_type,identifier,expected', [ - ('test_bc_add_prfx', 'BC', '0123456', 'BC0123456'), - ('test_cc_add_prfx', 'CC', '1234567', 'BC1234567'), - ('test_ulc_add_prfx', 'ULC', '2345678', 'BC2345678'), - ('test_ben_add_prfx', 'BEN', '0000001', 'BC0000001'), - ('test_bc_prfx_given', 'BC', 'BC0123466', 'BC0123466'), - ('test_cc_prfx_given', 'CC', 'BC1234577', 'BC1234577'), - ('test_ulc_prfx_given', 'ULC', 'BC234588', 'BC234588'), - ('test_ben_add_prfx', 'BEN', 'BC0000002', 'BC0000002'), - ('test_wrong_type_no_prfx', 'S', '0000003', '0000003'), - ('test_wrong_type_prfx_given', 'S', 'S3456790', 'S3456790') -]) -def test_update_bc_class_adds_prefix(session, client, jwt, test_name, legal_type, identifier, expected): - """Assert prefixes are added to BC, ULC and CC identifiers and only when no prefix is given.""" - request_json = deepcopy(CORP_TEMPLATE) - request_json['business']['legalType'] = legal_type - request_json['business']['identifier'] = identifier - - api_response = client.put(f'/api/v1/internal/solr/update', - data=json.dumps(request_json), - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'})) - # check - assert api_response.status_code == HTTPStatus.OK - time.sleep(2) # wait for solr to register update - search_response = client.get(f'/api/v1/businesses/search/facets?query=value:{expected}::identifier:{expected}', - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'})) - - assert search_response.status_code == HTTPStatus.OK - assert len(search_response.json['searchResults']['results']) == 1 - assert search_response.json['searchResults']['results'][0]['identifier'] == expected - - -@integration_solr -def test_update_business_in_solr_missing_data(session, client, jwt): - """Assert that error is returned.""" - request_json = deepcopy(CORP_TEMPLATE) - del request_json['business']['identifier'] - api_response = client.put(f'/api/v1/internal/solr/update', - data=json.dumps(request_json), - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - # check - assert api_response.status_code == HTTPStatus.BAD_REQUEST - - -@integration_solr -@pytest.mark.parametrize('test_name, party_type, good_standing', [ - ('invalid_goodStanding', 'organization', 'non-boolean'), - ('invalid_partyType', 'invalid type', 'true'), -]) -def test_update_business_in_solr_invalid_data(session, client, jwt, test_name, party_type, good_standing): - """Assert that error is returned.""" - request_json = deepcopy(FIRM_TEMPLATE) - request_json['parties'][0]['officer']['partyType'] = party_type - request_json['business']['goodStanding'] = good_standing - api_response = client.put(f'/api/v1/internal/solr/update', - data=json.dumps(request_json), - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - # check - assert api_response.status_code == HTTPStatus.BAD_REQUEST - - -@integration_solr -def test_resync(session, client, jwt): - """Assert that the resync update operation is successful.""" - # prep data (one record to find and one record to miss) - business_doc = deepcopy(SOLR_TEST_DOCS[0]) - business_doc.name = 'test_update_business_in_solr' - solr_doc = SolrDoc(doc=asdict(business_doc), identifier=business_doc.identifier).save() - business_doc_old = deepcopy(SOLR_TEST_DOCS[1]) - business_doc_old.name = 'test_update_business_in_solr should_not_find' - solr_doc_old = SolrDoc(doc=asdict(business_doc_old), identifier=business_doc_old.identifier).save() - solr_doc_old.submission_date = datetime.utcnow() - timedelta(minutes=10) - solr_doc_old.save() - - api_response = client.post(f'/api/v1/internal/solr/update/resync', json={'minutesOffset': 5}) - # check - assert api_response.status_code == HTTPStatus.CREATED - - doc_events = solr_doc.solr_doc_events.all() - assert len(doc_events) == 1 - assert doc_events[0].event_status == SolrDocEventStatus.COMPLETE - assert doc_events[0].event_type == SolrDocEventType.RESYNC - # did not update the older record - assert len(solr_doc_old.solr_doc_events.all()) == 0 - - time.sleep(2) # wait for solr to register update - search_response = client.get(f'/api/v1/businesses/search/facets?query=value:{business_doc.identifier}', - headers=create_header(jwt, [SYSTEM_ROLE], **{'Accept-Version': 'v1', - 'content-type': 'application/json'}) - ) - assert search_response.status_code == HTTPStatus.OK - assert len(search_response.json['searchResults']['results']) == 1 - assert search_response.json['searchResults']['results'][0]['name'] == business_doc.name diff --git a/search-api/tests/unit/api/search/__init__.py b/search-api/tests/unit/api/search/__init__.py new file mode 100644 index 00000000..9f6463c5 --- /dev/null +++ b/search-api/tests/unit/api/search/__init__.py @@ -0,0 +1,14 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite for the search endpoints.""" diff --git a/search-api/tests/unit/api/search/test_facets.py b/search-api/tests/unit/api/search/test_facets.py new file mode 100644 index 00000000..fec5d58f --- /dev/null +++ b/search-api/tests/unit/api/search/test_facets.py @@ -0,0 +1,398 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the business search endpoints/functions work as expected.""" +import time +from http import HTTPStatus + +import pytest + +from search_api.services import business_solr +from search_api.services.business_solr.doc_fields import BusinessField + +from tests import integration_solr +from tests.unit.utils import SOLR_TEST_DOCS + +from .util import format_param + + +@pytest.mark.parametrize('test_name,query,categories', [ + ('test_basic', {'value': '123'}, {}), + ('test_filters', + {'value': 'test filters', BusinessField.NAME.value: 'name', BusinessField.IDENTIFIER.value: 'BC23', BusinessField.BN.value: '023'}, + {} + ), + ('test_categories', + {'value': 'test categories'}, + {BusinessField.STATE.value:['ACTIVE'], BusinessField.TYPE.value: ['BC', 'CP', 'SP']} + ), + ('test_all_combined', + { + 'value': 'test all combined', + BusinessField.NAME.value: 'name', + BusinessField.IDENTIFIER.value: 'BC23', + BusinessField.BN.value: '023' + }, + { + BusinessField.STATE.value:['ACTIVE'], + BusinessField.TYPE.value: ['BC', 'CP', 'SP'] + }) +]) +def test_facets_solr_mock(app, session, client, requests_mock, test_name, query, categories): + """Assert that the entities search call works returns successfully.""" + # setup mocks + requests_mock.post(f"{app.config.get('SOLR_SVC_BUS_LEADER_URL')}/business/query", json={'response': {'docs': [], 'numFound': 0, 'start': 0}}) + # format args + params = {'query': format_param(query)} + if categories: + params['categories'] = format_param(categories) + # call search + resp = client.get('/api/v1/businesses/search/facets', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.OK + resp_json = resp.json + assert resp_json['facets'] == {'fields': {}} + assert resp_json['searchResults']['queryInfo']['rows'] == 10 + assert resp_json['searchResults']['queryInfo']['start'] == 0 + assert resp_json['searchResults']['results'] == [] + assert resp_json['searchResults']['totalResults'] == 0 + + +@integration_solr +@pytest.mark.parametrize('test_name,query,categories,expected', [ + ('test_basic_name', # NOTE: test setup checks for 'test_basic_name' on the first run + {'value': 'business one'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_case', + {'value': 'BusIness ONE'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_partial_1', + {'value': 'bus one'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_partial_2', + {'value': 'siness on'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_partial_3', + {'value': 'IVINE STERI'}, + {}, + [{'bn': 'BN00012388', 'identifier': 'BC0030016', 'legalType': 'BEN', 'name': 'DIVINE ÉBÉNISTERIE INC.', 'status': 'ACTIVE'}] + ), + ('test_basic_name_spellcheck', + {'value': 'basiness thrae'}, + {}, + [{'goodStanding': True, 'identifier': 'CP0034567', 'legalType': 'CP', 'name': 'business three 3', 'status': 'ACTIVE'}] + ), + ('test_basic_name_stem_1', + {'value': 'business eights'}, + {}, + [{'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_stem_2', + {'value': 'businessing one'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_stem_3', + {'value': 'businessed one'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_mix', + {'value': 'one business'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_mix_partial', + {'value': 'STERI IVINE'}, + {}, + [{'bn': 'BN00012388', 'identifier': 'BC0030016', 'legalType': 'BEN', 'name': 'DIVINE ÉBÉNISTERIE INC.', 'status': 'ACTIVE'}] + ), + ('test_basic_name_mix_stem', + {'value': 'one businesses'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_adv_chars', + {'value': 'b*s o?e "1"'}, + {}, + [{'bn': 'BN00012334', 'goodStanding': True, 'identifier': 'CP1234567', 'legalType': 'CP', 'name': 'business one 1', 'status': 'ACTIVE'}] + ), + ('test_basic_name_spec_char', + {'value': 'b!u(si)ness fou}l{rt-een ~`@#$%^-_=[]|\\;:\'",<>./'}, + {}, + [{'bn': '123456776BC0001', 'identifier': 'BC0030014', 'legalType': 'BEN', 'name': 'b!u(si)ness fou}l{rt-een ~`@#$%^-_=[]|\\;:\'",<>./', 'status': 'ACTIVE'}] + ), + ('test_basic_name_and_and', + {'value': 'special and match'}, + {}, + [{'bn': '242217', 'identifier': 'BC0000067', 'legalType': 'BEN', 'name': 'business six 6 special and match', 'status': 'ACTIVE'}, + {'bn': '124221', 'identifier': 'BC0000007', 'legalType': 'BEN', 'name': 'business seven 7 special & match', 'status': 'ACTIVE'}, + {'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}, + {'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'status': 'ACTIVE'}, + {'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_and_&_1', + {'value': 'special & match'}, + {}, + [{'bn': '242217', 'identifier': 'BC0000067', 'legalType': 'BEN', 'name': 'business six 6 special and match', 'status': 'ACTIVE'}, + {'bn': '124221', 'identifier': 'BC0000007', 'legalType': 'BEN', 'name': 'business seven 7 special & match', 'status': 'ACTIVE'}, + {'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}, + {'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'status': 'ACTIVE'}, + {'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_and_&_2', + {'value': 'special&match'}, + {}, + [{'bn': '242217', 'identifier': 'BC0000067', 'legalType': 'BEN', 'name': 'business six 6 special and match', 'status': 'ACTIVE'}, + {'bn': '124221', 'identifier': 'BC0000007', 'legalType': 'BEN', 'name': 'business seven 7 special & match', 'status': 'ACTIVE'}, + {'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}, + {'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'status': 'ACTIVE'}, + {'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_and_+_1', + {'value': 'special + match'}, + {}, + [{'bn': '242217', 'identifier': 'BC0000067', 'legalType': 'BEN', 'name': 'business six 6 special and match', 'status': 'ACTIVE'}, + {'bn': '124221', 'identifier': 'BC0000007', 'legalType': 'BEN', 'name': 'business seven 7 special & match', 'status': 'ACTIVE'}, + {'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}, + {'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'status': 'ACTIVE'}, + {'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_and_+_2', + {'value': 'special+match'}, + {}, + [{'bn': '242217', 'identifier': 'BC0000067', 'legalType': 'BEN', 'name': 'business six 6 special and match', 'status': 'ACTIVE'}, + {'bn': '124221', 'identifier': 'BC0000007', 'legalType': 'BEN', 'name': 'business seven 7 special & match', 'status': 'ACTIVE'}, + {'bn': '1255323221', 'identifier': 'BC0020047', 'legalType': 'BEN', 'name': 'business eight 8 special&match', 'status': 'ACTIVE'}, + {'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'status': 'ACTIVE'}, + {'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_._1', + {'value': 'firm eleven y.z.'}, + {}, + [{'identifier': 'FM0004018', 'legalType': 'GP', 'name': 'firm eleven 11 periods y.z. xk', 'status': 'ACTIVE'}] + ), + ('test_basic_name_._2', + {'value': 'firm eleven yz'}, + {}, + [{'identifier': 'FM0004018', 'legalType': 'GP', 'name': 'firm eleven 11 periods y.z. xk', 'status': 'ACTIVE'}] + ), + ('test_basic_name_._3', + {'value': 'firm eleven x.k.'}, + {}, + [{'identifier': 'FM0004018', 'legalType': 'GP', 'name': 'firm eleven 11 periods y.z. xk', 'status': 'ACTIVE'}] + ), + ('test_basic_name_._4', + {'value': 'firm eleven xk'}, + {}, + [{'identifier': 'FM0004018', 'legalType': 'GP', 'name': 'firm eleven 11 periods y.z. xk', 'status': 'ACTIVE'}] + ), + ('test_basic_name_-_1', + {'value': 'special - match'}, + {}, + [{'bn': '123456786BC0001', 'identifier': 'BC0030024', 'legalType': 'BEN', 'name': 'business thirteen 13 special - match', 'status': 'ACTIVE'}, + {'bn': '123456785BC0001', 'identifier': 'BC0030023', 'legalType': 'BEN', 'name': 'business twelve 12 special-match', 'status': 'ACTIVE'}] + ), + ('test_basic_name_-_2', + {'value': 'special-match'}, + {}, + [{'bn': '123456786BC0001', 'identifier': 'BC0030024', 'legalType': 'BEN', 'name': 'business thirteen 13 special - match', 'status': 'ACTIVE'}, + {'bn': '123456785BC0001', 'identifier': 'BC0030023', 'legalType': 'BEN', 'name': 'business twelve 12 special-match', 'status': 'ACTIVE'}] + ), + ('test_basic_identifier', + {'value': 'BC0004567'}, + {}, + [{'bn': '00987766800988', 'goodStanding': False, 'identifier': 'BC0004567', 'legalType': 'BEN', 'name': 'business four 4', 'status': 'ACTIVE'}] + ), + ('test_basic_identifier_partial', + {'value': 'BC00045'}, + {}, + [{'bn': '00987766800988', 'goodStanding': False, 'identifier': 'BC0004567', 'legalType': 'BEN', 'name': 'business four 4', 'status': 'ACTIVE'}] + ), + ('test_basic_identifier_no_spellcheck', + {'value': 'BC1004567'}, + {}, + [] + ), + ('test_basic_bn', + {'value': '00987766800988'}, + {}, + [{'bn': '00987766800988', 'goodStanding': False, 'identifier': 'BC0004567', 'legalType': 'BEN', 'name': 'business four 4', 'status': 'ACTIVE'}] + ), + ('test_basic_bn_partial', + {'value': '00987766'}, + {}, + [{'bn': '00987766800988', 'goodStanding': False, 'identifier': 'BC0004567', 'legalType': 'BEN', 'name': 'business four 4', 'status': 'ACTIVE'}] + ), + ('test_basic_bn_no_spellcheck', + {'value': '00987766800989'}, + {}, + [] + ), + ('test_basic_combined', + {'value': 'business BC0004567 00987766800988'}, + {}, + [{'bn': '00987766800988', 'goodStanding': False, 'identifier': 'BC0004567', 'legalType': 'BEN', 'name': 'business four 4', 'status': 'ACTIVE'}] + ), + ('test_basic_no_match', {'value': 'zzz no match here qljrb'}, {},[]), + ('test_filters_name', + {'value': 'business', BusinessField.NAME.value: 'three'}, + {}, + [{'goodStanding': True, 'identifier': 'CP0034567', 'legalType': 'CP', 'name': 'business three 3', 'status': 'ACTIVE'}] + ), + ('test_filters_no_match', + {'value': 'business', BusinessField.NAME.value: 'threa'}, + {}, + [] + ), + ('test_categories_state', + {'value': 'business two'}, + {BusinessField.TYPE.value: 'CP'}, + [{'bn': '09876K', 'goodStanding': True, 'identifier': 'CP0234567', 'legalType': 'CP', 'name': 'business two 2', 'status': 'HISTORICAL'}] + ), + ('test_categories_no_match', + {'value': 'business two'}, + {BusinessField.TYPE.value: 'BEN'}, + [] + ), + ('test_all_combined', + { + 'value': 'business', + BusinessField.NAME.value: 'two', + BusinessField.IDENTIFIER.value: 'CP0234567', + BusinessField.BN.value: '09876K', + }, + { + BusinessField.STATE.value: 'HISTORICAL', + BusinessField.TYPE.value: 'CP' + }, + [{'bn': '09876K', 'goodStanding': True, 'identifier': 'CP0234567', 'legalType': 'CP', 'name': 'business two 2', 'status': 'HISTORICAL'}] + ) +]) +def test_facets(app, session, client, test_name, query, categories, expected): + """Assert that the business search call works returns successfully.""" + # test setup + if test_name == 'test_basic_name': + # setup solr data for test (only needed the first time) + business_solr.delete_all_docs() + time.sleep(1) + business_solr.create_or_replace_docs(SOLR_TEST_DOCS) + time.sleep(2) + + # format args + params = {'query': format_param(query)} + if categories: + params['categories'] = format_param(categories) + # call search + resp = client.get('/api/v1/businesses/search/facets', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.OK + resp_json = resp.json + assert resp_json['facets'] + assert resp_json['searchResults'] + results = resp_json['searchResults']['results'] + for result in results: + del result['score'] + assert resp_json['searchResults']['totalResults'] == len(expected) + assert results == expected + + +@integration_solr +@pytest.mark.parametrize('test_name,query,expected', [ + ('test_si', # NOTE: test setup checks for 'test_si' on the first run + {'value': 'business five'}, + [{'bn': 'BN9000776557', 'identifier': 'BC0000567', 'legalType': 'BC', 'name': 'business five 5', 'parties': [{'partyName': 'test si', 'partyRoles': ['significant individual'], 'partyType': 'person', 'score': 0.0}], 'status': 'HISTORICAL'}] + ), + ('test_proprietor', + {'value': 'firm nine'}, + [{'bn': '123', 'identifier': 'FM1000028', 'legalType': 'SP', 'name': 'firm nine 9 special + match', 'parties': [{'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person', 'score': 0.0}], 'status': 'ACTIVE'}] + ), + ('test_partner', + {'value': 'firm ten'}, + [{'identifier': 'FM1001118', 'legalType': 'GP', 'name': 'firm ten 10 special+match', 'parties': [{'partyName': 'organization one', 'partyRoles': ['partner'], 'partyType': 'organization', 'score': 0.0}], 'status': 'ACTIVE'}] + ) +]) +def test_facets_with_parties(app, session, client, test_name, query, expected): + """Assert that the business search call works returns successfully.""" + # test setup + if test_name == 'test_si': + # setup solr data for test (only needed the first time) + business_solr.delete_all_docs() + time.sleep(1) + business_solr.create_or_replace_docs(SOLR_TEST_DOCS) + time.sleep(2) + + # format args + params = {'query': format_param(query), 'parties': 'true'} + # call search + resp = client.get('/api/v1/businesses/search/facets', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.OK + resp_json = resp.json + assert resp_json['facets'] + assert resp_json['searchResults'] + results = resp_json['searchResults']['results'] + for result in results: + del result['score'] + assert result['parties'] + assert results == expected + + +def test_search_error(app, session, client, requests_mock): + """Assert that the business search call error handling works as expected.""" + # setup solr error mock + mocked_error_msg = 'mocked error' + mocked_status_code = HTTPStatus.BAD_GATEWAY + requests_mock.post(f"{app.config.get('SOLR_SVC_BUS_LEADER_URL')}/business/query", json={'error': {'msg': mocked_error_msg}}, status_code=mocked_status_code) + # call search + resp = client.get('/api/v1/businesses/search/facets?query=value:test', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}) + # test + assert resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + resp_json = resp.json + assert resp_json.get('detail') == f'{mocked_error_msg}, {mocked_status_code}' + assert resp_json.get('message') == 'Solr service error while processing request.' + + +@pytest.mark.parametrize('test_name,query,errors', [ + ('test_no_query', {}, [{'missing param': "Expected url param 'query'."}]), + ('test_no_value', {'notValue': 'bla'}, [{'query param': "Expected url param 'query' to have 'value:'."}]) +]) +def test_search_bad_request(app, session, client, test_name, query, errors): + """Assert that the business search call validates the payload.""" + # format args + params = {'query': format_param(query)} + # call search + resp = client.get('/api/v1/businesses/search/facets', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.BAD_REQUEST + resp_json = resp.json + assert resp_json.get('message') == 'Invalid args' + assert resp_json.get('details') == errors + diff --git a/search-api/tests/unit/api/search/test_parties.py b/search-api/tests/unit/api/search/test_parties.py new file mode 100644 index 00000000..5ade69aa --- /dev/null +++ b/search-api/tests/unit/api/search/test_parties.py @@ -0,0 +1,242 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test-Suite to ensure that the parties search endpoints/functions work as expected.""" +import time +from http import HTTPStatus + +import pytest + +from search_api.services import business_solr +from search_api.services.business_solr.doc_fields import PartyField + +from tests import integration_solr +from tests.unit.utils import SOLR_TEST_DOCS + +from .util import format_param + + +@pytest.mark.parametrize('test_name,query,categories', [ + ('test_basic', {'value': '123'}, {PartyField.PARTY_ROLE.value: ['partner','proprietor']}), + ('test_filters', + {'value': 'test filters', PartyField.PARENT_NAME.value: 'name', PartyField.PARENT_IDENTIFIER.value: 'BC23', PartyField.PARENT_BN.value: '023'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']} + ), + ('test_categories', + {'value': 'test categories'}, + {PartyField.PARENT_STATE.value:['ACTIVE'], PartyField.PARENT_TYPE.value: ['BC', 'CP', 'SP'], PartyField.PARTY_ROLE.value: ['partner','proprietor']} + ), + ('test_all_combined', + { + 'value': 'test all combined', + PartyField.PARENT_NAME.value: 'name', + PartyField.PARENT_IDENTIFIER.value: 'BC23', + PartyField.PARENT_BN.value: '023' + }, + { + PartyField.PARENT_STATE.value: ['ACTIVE'], + PartyField.PARENT_TYPE.value: ['BC', 'CP', 'SP'], + PartyField.PARTY_ROLE.value: ['partner','proprietor'] + }) +]) +def test_parties_solr_mock(app, session, client, requests_mock, test_name, query, categories): + """Assert that the parties search call works returns successfully.""" + # setup mocks + requests_mock.post(f"{app.config.get('SOLR_SVC_BUS_LEADER_URL')}/business/query", json={'response': {'docs': [], 'numFound': 0, 'start': 0}}) + # format args + params = {'query': format_param(query)} + if categories: + params['categories'] = format_param(categories) + # call search + resp = client.get('/api/v1/businesses/search/parties', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.OK + resp_json = resp.json + assert resp_json['facets'] == {'fields': {}} + assert resp_json['searchResults']['queryInfo']['rows'] == 10 + assert resp_json['searchResults']['queryInfo']['start'] == 0 + assert resp_json['searchResults']['results'] == [] + assert resp_json['searchResults']['totalResults'] == 0 + + +@integration_solr +@pytest.mark.parametrize('test_name,query,categories,expected', [ + ('test_basic_name', # NOTE: test setup checks for 'test_basic' on the first run + {'value': 'person one'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_case', # NOTE: test setup checks for 'test_basic' on the first run + {'value': 'pErson ONE'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_partial_1', + {'value': 'pers one'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_partial_2', + {'value': 'erson one'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_partial_3', + {'value': 'erso ne'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_spellcheck', + {'value': 'parson one'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_mix', + {'value': 'one person'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_mix_partial', + {'value': 'ne erson'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_adv_chars', + {'value': 'p*n o?e "one"'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_basic_name_._1', + {'value': 'organization two y.z.'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentIdentifier': 'FM0004018', 'parentLegalType': 'GP', 'parentName': 'firm eleven 11 periods y.z. xk', 'parentStatus': 'ACTIVE', 'partyName': 'organization two y.z. xk', 'partyRoles': ['partner'], 'partyType': 'organization'}] + ), + ('test_basic_name_._2', + {'value': 'organization two yz'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentIdentifier': 'FM0004018', 'parentLegalType': 'GP', 'parentName': 'firm eleven 11 periods y.z. xk', 'parentStatus': 'ACTIVE', 'partyName': 'organization two y.z. xk', 'partyRoles': ['partner'], 'partyType': 'organization'}] + ), + ('test_basic_name_._3', + {'value': 'organization two x.k.'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentIdentifier': 'FM0004018', 'parentLegalType': 'GP', 'parentName': 'firm eleven 11 periods y.z. xk', 'parentStatus': 'ACTIVE', 'partyName': 'organization two y.z. xk', 'partyRoles': ['partner'], 'partyType': 'organization'}] + ), + ('test_basic_name_._4', + {'value': 'organization two xk'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentIdentifier': 'FM0004018', 'parentLegalType': 'GP', 'parentName': 'firm eleven 11 periods y.z. xk', 'parentStatus': 'ACTIVE', 'partyName': 'organization two y.z. xk', 'partyRoles': ['partner'], 'partyType': 'organization'}] + ), + ('test_basic_no_match', {'value': 'zzz no match here qljrb'}, {PartyField.PARTY_ROLE.value: ['partner','proprietor']},[]), + ('test_filters_name', + {'value': 'person', PartyField.PARENT_NAME.value: 'nine'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_filters_no_match', + {'value': 'person', PartyField.PARENT_NAME.value: 'three'}, + {PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [] + ), + ('test_categories_state', + {'value': 'person'}, + {PartyField.PARENT_TYPE.value: 'SP', PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ), + ('test_categories_no_match', + {'value': 'person'}, + {PartyField.PARENT_TYPE.value: 'BEN', PartyField.PARTY_ROLE.value: ['partner','proprietor']}, + [] + ), + ('test_all_combined', + { + 'value': 'person', + PartyField.PARENT_NAME.value: 'nine', + PartyField.PARENT_IDENTIFIER.value: 'FM1000028', + PartyField.PARENT_BN.value: '123', + }, + { + PartyField.PARENT_STATE.value: 'ACTIVE', + PartyField.PARENT_TYPE.value: 'SP', + PartyField.PARTY_ROLE.value: ['partner','proprietor'] + }, + [{'parentBN': '123', 'parentIdentifier': 'FM1000028', 'parentLegalType': 'SP', 'parentName': 'firm nine 9 special + match', 'parentStatus': 'ACTIVE', 'partyName': 'person one', 'partyRoles': ['proprietor'], 'partyType': 'person'}] + ) +]) +def test_parties(app, session, client, test_name, query, categories, expected): + """Assert that the parties search call works returns successfully.""" + # test setup + if test_name == 'test_basic_name': + # setup solr data for test (only needed the first time) + business_solr.delete_all_docs() + time.sleep(1) + business_solr.create_or_replace_docs(SOLR_TEST_DOCS) + time.sleep(2) + + # format args + params = {'query': format_param(query)} + if categories: + params['categories'] = format_param(categories) + # call search + resp = client.get('/api/v1/businesses/search/parties', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.OK + resp_json = resp.json + assert resp_json['facets'] + assert resp_json['searchResults'] + results = resp_json['searchResults']['results'] + assert resp_json['searchResults']['totalResults'] == len(expected) + assert results == expected + + +def test_search_error(app, session, client, requests_mock): + """Assert that the parties search call error handling works as expected.""" + # setup solr error mock + mocked_error_msg = 'mocked error' + mocked_status_code = HTTPStatus.BAD_GATEWAY + requests_mock.post(f"{app.config.get('SOLR_SVC_BUS_LEADER_URL')}/business/query", json={'error': {'msg': mocked_error_msg}}, status_code=mocked_status_code) + # call search + resp = client.get('/api/v1/businesses/search/parties', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string={'query': 'value:test', 'categories': 'partyRoles:partner,proprietor'}) + # test + assert resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + resp_json = resp.json + assert resp_json.get('detail') == f'{mocked_error_msg}, {mocked_status_code}' + assert resp_json.get('message') == 'Solr service error while processing request.' + + +@pytest.mark.parametrize('test_name,query,categories,errors', [ + ('test_no_query', {}, {}, [{'missing param': "Expected url param 'query'."}]), + ('test_no_value', {'notValue': 'bla'}, {}, [{'query param': "Expected url param 'query' to have 'value:'."}]), + ('test_no_partyRoles', {'value': 'test'}, {}, None), +]) +def test_search_bad_request(app, session, client, test_name, query, categories, errors): + """Assert that the parties search call validates the payload.""" + # format args + params = {'query': format_param(query)} + if categories: + params['categories'] = format_param(categories) + # call search + resp = client.get('/api/v1/businesses/search/parties', + headers={'Accept-Version': 'v1', 'content-type': 'application/json'}, + query_string=params) + # test + assert resp.status_code == HTTPStatus.BAD_REQUEST + resp_json = resp.json + assert resp_json.get('message') + assert resp_json.get('details') == errors + diff --git a/search-api/tests/unit/api/search/util.py b/search-api/tests/unit/api/search/util.py new file mode 100644 index 00000000..0003560f --- /dev/null +++ b/search-api/tests/unit/api/search/util.py @@ -0,0 +1,26 @@ +# Copyright © 2024 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils for the api search endpoint tests.""" + + +def format_param(param_dict: dict) -> str: + """Return the formatted param.""" + param = '' + for key, value in param_dict.items(): + if param: + param += '::' + if isinstance(value, list): + value = ','.join(value) + param += f'{key}:{value}' + return param diff --git a/search-api/tests/unit/api/test_meta.py b/search-api/tests/unit/api/test_meta.py index 75614d33..f2bfec99 100644 --- a/search-api/tests/unit/api/test_meta.py +++ b/search-api/tests/unit/api/test_meta.py @@ -18,8 +18,6 @@ """ from http import HTTPStatus -from registry_schemas import __version__ as registry_schemas_version - from search_api.utils.run_version import get_run_version def test_info(session, client, jwt): @@ -31,4 +29,3 @@ def test_info(session, client, jwt): # check assert rv.status_code == HTTPStatus.OK assert rv.json['API'] == f'search_api/{get_run_version()}' - assert rv.json['SCHEMAS'] == f'registry_schemas/{registry_schemas_version}' diff --git a/search-api/tests/unit/models/test_solr_doc.py b/search-api/tests/unit/models/test_solr_doc.py index 6d940152..9a0519dd 100644 --- a/search-api/tests/unit/models/test_solr_doc.py +++ b/search-api/tests/unit/models/test_solr_doc.py @@ -18,9 +18,9 @@ from datetime import datetime, timedelta from search_api.models import SolrDoc -from search_api.services.solr.solr_docs import BusinessDoc +from search_api.services.business_solr.doc_models import BusinessDoc -from tests.unit.services.test_solr import SOLR_TEST_DOCS +from tests.unit.utils import SOLR_TEST_DOCS def test_solr_doc(session): @@ -42,8 +42,8 @@ def test_find_most_recent_by_identifier(session): business_doc_3 = deepcopy(SOLR_TEST_DOCS[0]) business_doc_3.name += '3' - solr_doc_1 = SolrDoc(doc=asdict(business_doc_1), identifier=business_doc_1.identifier).save() - solr_doc_2 = SolrDoc(doc=asdict(business_doc_2), identifier=business_doc_2.identifier).save() + SolrDoc(doc=asdict(business_doc_1), identifier=business_doc_1.identifier).save() + SolrDoc(doc=asdict(business_doc_2), identifier=business_doc_2.identifier).save() solr_doc_3 = SolrDoc(doc=asdict(business_doc_3), identifier=business_doc_3.identifier).save() # test method diff --git a/search-api/tests/unit/services/test_solr/__init__.py b/search-api/tests/unit/services/test_solr/__init__.py deleted file mode 100644 index 31cf656f..00000000 --- a/search-api/tests/unit/services/test_solr/__init__.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Tests to assure the Solr Services.""" -from search_api.services.solr.solr_docs import BusinessDoc, PartyDoc -from search_api.services.solr.solr_fields import SolrField - - -def create_solr_doc(identifier, name, state, legal_type, bn=None, parties=None, goodStanding=None) -> BusinessDoc: - solr_parties = None - if parties: - solr_parties = [] - for party in parties: - party_doc = { - SolrField.PARENT_BN.value: bn, - SolrField.PARENT_NAME.value: name, - SolrField.PARENT_STATE.value: state, - SolrField.PARENT_TYPE.value: legal_type, - SolrField.PARTY_NAME.value: party[0], - SolrField.PARTY_ROLE.value: [party[1]], - SolrField.PARTY_TYPE.value: party[2], - } - solr_parties.append(PartyDoc(**party_doc)) - return BusinessDoc( - identifier=identifier, - legalType=legal_type, - name=name, - status=state, - goodStanding=goodStanding, - bn=bn, - parties=solr_parties - ) - - -SOLR_TEST_DOCS = [ - create_solr_doc('CP1234567', 'test 1234', 'ACTIVE', 'CP', 'BN00012334', None, True), - create_solr_doc('CP0234567', 'tester 1111', 'HISTORICAL', 'CP', '09876K', None, True), - create_solr_doc('CP0034567', 'tests 2222', 'ACTIVE', 'CP', None, None, True), - create_solr_doc('BC0004567', 'test 3333', 'ACTIVE', 'BEN', '00987766800988', None, False), - create_solr_doc('BC0000567', '4444 test', 'HISTORICAL', 'BC', 'BN9000776557'), - create_solr_doc('BC0000067', 'single', 'ACTIVE', 'BEN', '242217'), - create_solr_doc('BC0000007', 'lots of words in here', 'ACTIVE', 'BEN', '124221'), - create_solr_doc('BC0020047', 'NOt Case SENSitive', 'ACTIVE', 'BEN', '1255323221'), - create_solr_doc('FM1000028', 'sp firm', 'ACTIVE', 'SP', '123', [('person 1', 'proprietor', 'person')]), - create_solr_doc('FM1001118', 'gp firm', 'ACTIVE', 'GP', None, [('org 1', 'partner', 'organization')]), - create_solr_doc('FM0004018', 'gp firm multiple parties', 'ACTIVE', 'GP', None, [('test org partner', 'partner', 'organization'), ('test person partner', 'partner', 'person')]), - create_solr_doc('BC0030001', '01 solr special && char', 'ACTIVE', 'BEN', '123456789BC0001'), - create_solr_doc('BC0030002', '02 solr special || char', 'ACTIVE', 'BEN', '123456788BC0001'), - create_solr_doc('BC0030003', '03 solr special: char', 'ACTIVE', 'BEN', '123456787BC0001'), - create_solr_doc('BC0030004', '04 solr special + char', 'ACTIVE', 'BEN', '123456786BC0001'), - create_solr_doc('BC0030005', '05 solr special - char', 'ACTIVE', 'BEN', '123456785BC0001'), - create_solr_doc('BC0030006', '06 solr special ! char', 'ACTIVE', 'BEN', '123456784BC0001'), - create_solr_doc('BC0030007', '07 solr special \ char', 'ACTIVE', 'BEN', '123456783BC0001'), - create_solr_doc('BC0030008', '08 solr special (char)', 'ACTIVE', 'BEN', '123456782BC0001'), - create_solr_doc('BC0030009', '09 solr special " char"', 'ACTIVE', 'BEN', '123456781BC0001'), - create_solr_doc('BC0030010', '10 solr special ~ char', 'ACTIVE', 'BEN', '123456780BC0001'), - create_solr_doc('BC0030011', '11 solr special* char', 'ACTIVE', 'BEN', '123456779BC0001'), - create_solr_doc('BC0030012', '12 solr special? char', 'ACTIVE', 'BEN', '123456778BC0001'), - create_solr_doc('BC0030013', '13 solr special / char', 'ACTIVE', 'BEN', '123456777BC0001'), - create_solr_doc('BC0030014', 'many special =&{}^%`#|<>,.@$;_chars', 'ACTIVE', 'BEN', '123456776BC0001'), - create_solr_doc('BC0030015', 'special OR AND NOT operators', 'ACTIVE', 'BEN', '123456775BC0001'), - create_solr_doc('BC0030016', 'DIVINE ÉBÉNISTERIE INC.', 'ACTIVE', 'BEN', 'BN00012388'), - create_solr_doc('BC0030017', 'special and match 1', 'ACTIVE', 'BEN', '123456780BC0001'), - create_solr_doc('BC0030018', 'special + match 2', 'ACTIVE', 'BEN', '123456781BC0001'), - create_solr_doc('BC0030019', 'special+match 3', 'ACTIVE', 'BEN', '123456782BC0001'), - create_solr_doc('BC0030020', 'special & match 4', 'ACTIVE', 'BEN', '123456783BC0001'), - create_solr_doc('BC0030021', 'special&match 5', 'ACTIVE', 'BEN', '123456784BC0001'), - create_solr_doc('BC0030023', 'special-dash match 1', 'ACTIVE', 'BEN', '123456785BC0001'), - create_solr_doc('BC0030024', 'special - dash match 2', 'ACTIVE', 'BEN', '123456786BC0001'), - create_solr_doc('BC0030025', 'special dash match 3', 'ACTIVE', 'BEN', '123456787BC0001'), - create_solr_doc('BC0030026', 'special match nothing', 'ACTIVE', 'BEN', '123456788BC0001') -] diff --git a/search-api/tests/unit/services/test_solr/test_solr_basic.py b/search-api/tests/unit/services/test_solr/test_solr_basic.py deleted file mode 100644 index dcd75cd3..00000000 --- a/search-api/tests/unit/services/test_solr/test_solr_basic.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Test-Suite to ensure that the Solr Service is working as expected for updates/deletes/searches.""" -import time -from dataclasses import asdict -from http import HTTPStatus - -import pytest -from flask import Flask - -from search_api.services import search_solr -from search_api.services.solr import Solr -from search_api.services.solr.solr_fields import SolrField - -from tests import integration_solr - -from . import create_solr_doc - - -@pytest.mark.parametrize('test_name,identifier,state,name,legal_type,bn', [ - ('test-1', 'CP1', 'ACTIVE', 'BASIC TEST 1', 'CP', '12345'), -]) -def test_solr_doc(test_name, identifier, state, name, legal_type, bn): - """Assert that solr doc class works as expected.""" - new_doc = create_solr_doc(identifier, name, state, legal_type, bn) - assert new_doc - json = asdict(new_doc) - assert json - assert json.get(SolrField.IDENTIFIER.value) == identifier - assert json.get(SolrField.STATE.value) == state - assert json.get(SolrField.NAME.value) == name - assert json.get(SolrField.TYPE.value) == legal_type - assert json.get(SolrField.BN.value) == bn - - -@integration_solr -@pytest.mark.parametrize('test_name,identifier,state,name,legal_type,bn,good_standing, expected_good_standing', [ - ('test-with-good_standing=true', 'CP1234577', 'ACTIVE', 'BASIC TEST 1', 'CP', '12345','true', True), - ('test-with-good_standing=false', 'CP1234577', 'ACTIVE', 'BASIC TEST 2', 'CP', '12345','false', False), - ('test-without-good_standing', 'CP1234577', 'ACTIVE', 'BASIC TEST 3', 'CP', '12345', None, None), -]) -def test_solr_create_delete(app, test_name, identifier, state, name, legal_type, bn, good_standing, expected_good_standing): - """Assert that solr docs can be created/deleted.""" - search_solr.init_app(app) - search_solr.delete_all_docs() - # add new doc - new_doc = create_solr_doc(identifier, name, state, legal_type, bn, None, good_standing) - added = search_solr.create_or_replace_docs([new_doc]) - assert added.status_code == HTTPStatus.OK - time.sleep(2) # takes up to 1 second for solr to register update - # search new doc - params = {'q': f'{SolrField.IDENTIFIER_Q.value}:{identifier}', 'fl': search_solr.base_fields} - resp = search_solr.query(params, 0, 10) - docs = resp['response']['docs'] - assert len(docs) == 1 - assert docs[0][SolrField.IDENTIFIER.value] == identifier - assert docs[0][SolrField.BN.value] == bn - assert docs[0][SolrField.NAME.value] == name - assert docs[0][SolrField.STATE.value] == state - assert docs[0][SolrField.TYPE.value] == legal_type - assert docs[0].get(SolrField.GOOD_STANDING.value) == expected_good_standing - # delete doc - deleted = search_solr.delete_docs([identifier]) - assert deleted.status_code == HTTPStatus.OK - time.sleep(1) # takes up to 1 second for solr to register update - # test search returns nothing - params = {'q': f'{SolrField.IDENTIFIER_Q.value}:{identifier}', 'fl': search_solr.base_fields} - resp = search_solr.query(params, 0, 10) - docs = resp['response']['docs'] - assert len(docs) == 0 - - -@pytest.mark.parametrize('test_name,params,expected', [ - ('test-basic-basic', {'query': 'name', 'fields': [SolrField.NAME_Q.value], 'wild': []}, {'q': f'({SolrField.NAME_Q.value}:name)', 'fq': ''}), - ('test-basic-basic-wild', {'query': 'name', 'fields': [SolrField.NAME_Q.value], 'wild': [SolrField.NAME_Q.value]}, {'q': f'({SolrField.NAME_Q.value}:name*)', 'fq': ''}), - ('test-basic-multi', {'query': 'name', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': []}, {'q': f'({SolrField.NAME_Q.value}:name OR {SolrField.NAME_STEM_AGRO.value}:name)', 'fq': ''}), - ('test-basic-multi-wild-1', {'query': 'name', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_Q.value]}, {'q': f'({SolrField.NAME_Q.value}:name* OR {SolrField.NAME_STEM_AGRO.value}:name)', 'fq': ''}), - ('test-basic-multi-wild-2', {'query': 'name', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_STEM_AGRO.value]}, {'q': f'({SolrField.NAME_Q.value}:name OR {SolrField.NAME_STEM_AGRO.value}:name*)', 'fq': ''}), - ('test-basic-multi-wild-3', {'query': 'name', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value]}, {'q': f'({SolrField.NAME_Q.value}:name* OR {SolrField.NAME_STEM_AGRO.value}:name*)', 'fq': ''}), - ('test-multi-basic', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value], 'wild': []}, {'q': f'({SolrField.NAME_Q.value}:name1)', 'fq': f'({SolrField.NAME_Q.value}:name2) AND ({SolrField.NAME_Q.value}:name3)'}), - ('test-multi-basic-wild', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value], 'wild': [SolrField.NAME_Q.value]}, {'q': f'({SolrField.NAME_Q.value}:name1*)', 'fq': f'({SolrField.NAME_Q.value}:name2*) AND ({SolrField.NAME_Q.value}:name3*)'}), - ('test-multi-multi', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': []}, {'q': f'({SolrField.NAME_Q.value}:name1 OR {SolrField.NAME_STEM_AGRO.value}:name1)', 'fq': f'({SolrField.NAME_Q.value}:name2 OR {SolrField.NAME_STEM_AGRO.value}:name2) AND ({SolrField.NAME_Q.value}:name3 OR {SolrField.NAME_STEM_AGRO.value}:name3)'}), - ('test-multi-multi-wild-1', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_Q.value]}, {'q': f'({SolrField.NAME_Q.value}:name1* OR {SolrField.NAME_STEM_AGRO.value}:name1)', 'fq': f'({SolrField.NAME_Q.value}:name2* OR {SolrField.NAME_STEM_AGRO.value}:name2) AND ({SolrField.NAME_Q.value}:name3* OR {SolrField.NAME_STEM_AGRO.value}:name3)'}), - ('test-multi-multi-wild-2', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_STEM_AGRO.value]}, {'q': f'({SolrField.NAME_Q.value}:name1 OR {SolrField.NAME_STEM_AGRO.value}:name1*)', 'fq': f'({SolrField.NAME_Q.value}:name2 OR {SolrField.NAME_STEM_AGRO.value}:name2*) AND ({SolrField.NAME_Q.value}:name3 OR {SolrField.NAME_STEM_AGRO.value}:name3*)'}), - ('test-multi-multi-wild-3', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value], 'wild': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value]}, {'q': f'({SolrField.NAME_Q.value}:name1* OR {SolrField.NAME_STEM_AGRO.value}:name1*)', 'fq': f'({SolrField.NAME_Q.value}:name2* OR {SolrField.NAME_STEM_AGRO.value}:name2*) AND ({SolrField.NAME_Q.value}:name3* OR {SolrField.NAME_STEM_AGRO.value}:name3*)'}), - ('test-complex-1', {'query': 'name1 name2 name3', 'fields': [SolrField.NAME_Q.value, SolrField.NAME_STEM_AGRO.value, SolrField.IDENTIFIER_Q.value, SolrField.BN_Q.value], 'wild': [SolrField.IDENTIFIER_Q.value, SolrField.BN_Q.value]}, {'q': f'({SolrField.NAME_Q.value}:name1 OR {SolrField.NAME_STEM_AGRO.value}:name1 OR ({SolrField.IDENTIFIER_Q.value}:"1" AND {SolrField.IDENTIFIER_Q.value}:"NAME") OR {SolrField.BN_Q.value}:name1*)', 'fq': f'({SolrField.NAME_Q.value}:name2 OR {SolrField.NAME_STEM_AGRO.value}:name2 OR {SolrField.IDENTIFIER_Q.value}:name2* OR {SolrField.BN_Q.value}:name2*) AND ({SolrField.NAME_Q.value}:name3 OR {SolrField.NAME_STEM_AGRO.value}:name3 OR {SolrField.IDENTIFIER_Q.value}:name3* OR {SolrField.BN_Q.value}:name3*)'}), -]) -def test_build_split_query(test_name, params, expected): - """Assert that the build_split_query function works as expected.""" - split_query = Solr.build_split_query({'value': params['query']}, params['fields'], params['wild']) - assert split_query == expected - - -@pytest.mark.parametrize('test_name,query,names,expected', [ - ('test-1', '1234567', ['1234567 B.C. LTD.'], ['1234567 B.C. LTD.']), - ('test-2', 'my query', ['MY QUERY 1', 'THIS IS MY QUERY'], ['MY QUERY 1', 'THIS IS MY QUERY']), -]) -def test_highlight_names(test_name, query, names, expected): - """Assert the highlight names function works as expected.""" - highlighted_names = Solr.highlight_names(query.upper(), names) - assert highlighted_names == expected - - -@pytest.mark.parametrize('test_name,facet_data,expected', [ - ('test-1', - {'facets': {SolrField.TYPE.value: {'buckets': [{'val': 'BEN', 'count': 23}, {'val': 'CP', 'count': 10}, {'val': 'SP', 'count': 102}]}, SolrField.STATE.value: {'buckets': [{'val': 'ACTIVE', 'count': 23}, {'val': 'HISTORICAL', 'count': 10}]}}}, - {'fields': {SolrField.TYPE.value: [{'value': 'BEN', 'count': 23}, {'value': 'CP', 'count': 10}, {'value': 'SP', 'count': 102}], SolrField.STATE.value: [{'value': 'ACTIVE', 'count': 23}, {'value': 'HISTORICAL', 'count': 10}]}}), -]) -def test_parse_facets(test_name, facet_data, expected): - """Assert the parse facets function works as expected.""" - facet_info = Solr.parse_facets(facet_data) - assert facet_info == expected diff --git a/search-api/tests/unit/services/test_solr/test_solr_query.py b/search-api/tests/unit/services/test_solr/test_solr_query.py deleted file mode 100644 index c8a8b33f..00000000 --- a/search-api/tests/unit/services/test_solr/test_solr_query.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright © 2022 Province of British Columbia -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE_2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Test suite to ensure that the Solr Service is working as expected for all queries.""" -import time -from http import HTTPStatus - -import pytest -from flask import Flask - -from search_api.services import search_solr -from search_api.services.solr import Solr -from search_api.services.solr.solr_fields import SolrField - -from tests import integration_solr -from . import SOLR_TEST_DOCS - - -@integration_solr -@pytest.mark.parametrize('test_name,query,expected', [ - ('test_doesnt_match_identifier', 'CP00', []), - ('test_doesnt_match_bn', 'BN00012334', []), - ('test_name_exact', 'tests 2222', ['tests 2222']), - ('test_case', 'not case sensitive', ['NOt Case SENSitive']), - ('test_partial_1', 'tester', ['tester 1111']), - ('test_partial_2', 'tester 11', ['tester 1111']), - ('test_partial_3', 'lots of wor', ['lots of words in here']), - ('test_all_words_match', 'tests oops 2222', []), - ('test_stem_matches', 'test 2222', ['tests 2222']), - ('test_multiple_matches', 'test', ['test 1234', 'tester 1111', 'tests 2222', 'test 3333', '4444 test']), -]) -def test_solr_suggest_name(app, test_name, query, expected): - """Assert that solr suggest call works as expected.""" - # setup - search_solr.init_app(app) - search_solr.delete_all_docs() - search_solr.create_or_replace_docs(SOLR_TEST_DOCS) - time.sleep(1) # wait for solr to register update - # call suggester - suggestions = search_solr.suggest(query, 10, True) - assert len(suggestions) == len(expected) - for name in expected: - assert name.upper() in suggestions - - -@integration_solr -@pytest.mark.parametrize('test_name,query,query_field,base_fields,expected_field,expected', [ - ('test_identifier_1', 'CP00', SolrField.IDENTIFIER_Q.value, True, SolrField.IDENTIFIER.value, ['CP0034567']), - ('test_identifier_2', 'CP567', SolrField.IDENTIFIER_Q.value, True, SolrField.IDENTIFIER.value, ['CP0034567', 'CP1234567', 'CP0234567']), - ('test_bn', '0012334', SolrField.BN_Q.value, True, SolrField.BN.value, ['BN00012334']), - ('test_name_exact', 'tests 2222', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['tests 2222']), - ('test_case', 'not case sensitive', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['NOt Case SENSitive']), - ('test_partial_1', 'tester', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['tester 1111']), - ('test_partial_2', 'tester 11', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['tester 1111']), - ('test_partial_3', 'lots of wor', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['lots of words in here']), - ('test_partial_4', 'ots of ords', SolrField.NAME_SINGLE.value, True, SolrField.NAME.value, ['lots of words in here']), - ('test_all_words_match', 'tests oops 2222', SolrField.NAME_Q.value, True, SolrField.NAME.value, []), - ('test_multiple_matches', 'test 1', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['test 1234', 'tester 1111']), - ('test_parties_1', 'org', SolrField.PARTY_NAME_Q.value, False, SolrField.PARTY_NAME.value, ['org 1', 'test org partner']), - ('test_parties_2', 'person', SolrField.PARTY_NAME_Q.value, False, SolrField.PARTY_NAME.value, ['person 1', 'test person partner']), - ('test_parties_3', 'test Person', SolrField.PARTY_NAME_Q.value, False, SolrField.PARTY_NAME.value, ['test person partner']), - ('test_parties_4', 'test partner', SolrField.PARTY_NAME_Q.value, False, SolrField.PARTY_NAME.value, ['test org partner', 'test person partner']), - ('test_special_chars_name_&&', '01 special && char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['01 solr special && char']), - ('test_special_chars_name_||', '02 special || char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['02 solr special || char']), - ('test_special_chars_name_:', '03 special: char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['03 solr special: char']), - ('test_special_chars_name_+', '04 special + char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['04 solr special + char']), - ('test_special_chars_name_-', '05 special - char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['05 solr special - char']), - ('test_special_chars_name_!', '06 special ! char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['06 solr special ! char']), - ('test_special_chars_name_\\', '07 special \ char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['07 solr special \ char']), - ('test_special_chars_name_()', '08 special (char)', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['08 solr special (char)']), - ('test_special_chars_name_"', '09 special " char"', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['09 solr special " char"']), - ('test_special_chars_name_~', '10 special ~ char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['10 solr special ~ char']), - ('test_special_chars_name_*', '11 special* char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['11 solr special* char']), - ('test_special_chars_name_?', '12 special? char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['12 solr special? char']), - ('test_special_chars_name_/', '13 special / char', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['13 solr special / char']), - ('test_special_chars_name_X', 'many special =&{}^%`#|<>,.@$;_chars', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['many special =&{}^%`#|<>,.@$;_chars']), - ('test_special_operators_OR', 'special OR operator', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special OR AND NOT operators']), - ('test_special_operators_AND', 'special AND operator', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special OR AND NOT operators']), - ('test_special_operators_NOT', 'special NOT operator', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special OR AND NOT operators']), - ('test_accented_character_1', 'EBENISTERIE', SolrField.NAME_SINGLE.value, True, SolrField.NAME.value, ['DIVINE ÉBÉNISTERIE INC.']), - ('test_accented_character_2', 'EBENISTERIE', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['DIVINE ÉBÉNISTERIE INC.']), - ('test_accented_character_4', 'EBENISTERIE', SolrField.NAME_STEM_AGRO.value, True, SolrField.NAME.value, ['DIVINE ÉBÉNISTERIE INC.']), - ('test_+&and_+_1', 'special + match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special and match 1', 'special + match 2', 'special+match 3', 'special & match 4', 'special&match 5']), - ('test_+&and_+_2', 'special+match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special and match 1', 'special + match 2', 'special+match 3', 'special & match 4', 'special&match 5']), - ('test_+&and_&_1', 'special & match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special and match 1', 'special + match 2', 'special+match 3', 'special & match 4', 'special&match 5']), - ('test_+&and_&_2', 'special&match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special and match 1', 'special + match 2', 'special+match 3', 'special & match 4', 'special&match 5']), - ('test_+&and_and', 'special and match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special and match 1', 'special + match 2', 'special+match 3', 'special & match 4', 'special&match 5']), - ('test_dash_1', 'special-dash match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special-dash match 1', 'special - dash match 2', 'special dash match 3']), - ('test_dash_2', 'special - dash match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special-dash match 1', 'special - dash match 2', 'special dash match 3']), - ('test_dash_3', 'special dash match', SolrField.NAME_Q.value, True, SolrField.NAME.value, ['special-dash match 1', 'special - dash match 2', 'special dash match 3']), -]) -def test_solr_query(app, test_name, query, query_field, base_fields, expected_field, expected): - """Assert that solr query call works as expected.""" - # setup - search_solr.init_app(app) - search_solr.delete_all_docs() - search_solr.create_or_replace_docs(SOLR_TEST_DOCS) - time.sleep(1) # wait for solr to register update - query = {'value': Solr.prep_query_str(query)} - search_params = Solr.build_split_query(query, [query_field, SolrField.NAME_STEM_AGRO.value], [SolrField.NAME_Q.value]) - search_params['fl'] = search_solr.base_fields if base_fields else search_solr.party_fields - # call select - resp = search_solr.query(search_params, 0, 10) - docs = resp['response']['docs'] - # test - assert len(docs) == len(expected) - for doc in docs: - assert doc[expected_field] in expected diff --git a/search-api/tests/unit/utils/__init__.py b/search-api/tests/unit/utils/__init__.py index 74e35fc9..4d71bd5b 100644 --- a/search-api/tests/unit/utils/__init__.py +++ b/search-api/tests/unit/utils/__init__.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """Tests for the api utils module.""" +from search_api.services.business_solr.doc_fields import BusinessField, PartyField +from search_api.services.business_solr.doc_models import BusinessDoc, PartyDoc + SOLR_UPDATE_REQUEST_TEMPLATE_CORP = { "business": { @@ -50,3 +53,64 @@ "roles": [{"roleType": "proprietor"}] }] } + +def create_solr_doc(identifier, name, state, legal_type, bn=None, parties=None, goodStanding=None) -> BusinessDoc: + solr_parties = None + if parties: + solr_parties = [] + for index, party in enumerate(parties): + party_doc = { + PartyField.UNIQUE_KEY.value: identifier + '_' + str(index), + PartyField.PARENT_BN.value: bn, + PartyField.PARENT_IDENTIFIER.value: identifier, + PartyField.PARENT_NAME.value: name, + PartyField.PARENT_STATE.value: state, + PartyField.PARENT_TYPE.value: legal_type, + PartyField.PARTY_NAME.value: party[0], + PartyField.PARTY_ROLE.value: [party[1]], + PartyField.PARTY_TYPE.value: party[2], + } + solr_parties.append(PartyDoc(**party_doc)) + return BusinessDoc( + id=identifier, + identifier=identifier, + legalType=legal_type, + name=name, + status=state, + goodStanding=goodStanding, + bn=bn, + parties=solr_parties + ) + + +SOLR_TEST_DOCS = [ + create_solr_doc('CP1234567', 'business one 1', 'ACTIVE', 'CP', 'BN00012334', None, True), + create_solr_doc('CP0234567', 'business two 2', 'HISTORICAL', 'CP', '09876K', None, True), + create_solr_doc('CP0034567', 'business three 3', 'ACTIVE', 'CP', None, None, True), + create_solr_doc('BC0004567', 'business four 4', 'ACTIVE', 'BEN', '00987766800988', None, False), + create_solr_doc('BC0000567', 'business five 5', 'HISTORICAL', 'BC', 'BN9000776557', [('test si', 'significant individual', 'person')]), + create_solr_doc('BC0000067', 'business six 6 special and match', 'ACTIVE', 'BEN', '242217'), + create_solr_doc('BC0000007', 'business seven 7 special & match', 'ACTIVE', 'BEN', '124221'), + create_solr_doc('BC0020047', 'business eight 8 special&match', 'ACTIVE', 'BEN', '1255323221'), + create_solr_doc('FM1000028', 'firm nine 9 special + match', 'ACTIVE', 'SP', '123', [('person one', 'proprietor', 'person')]), + create_solr_doc('FM1001118', 'firm ten 10 special+match', 'ACTIVE', 'GP', None, [('organization one', 'partner', 'organization')]), + create_solr_doc('FM0004018', 'firm eleven 11 periods y.z. xk', 'ACTIVE', 'GP', None, [('organization two y.z. xk', 'partner', 'organization'), ('person two', 'partner', 'person')]), + create_solr_doc('BC0030023', 'business twelve 12 special-match', 'ACTIVE', 'BEN', '123456785BC0001'), + create_solr_doc('BC0030024', 'business thirteen 13 special - match', 'ACTIVE', 'BEN', '123456786BC0001'), + create_solr_doc('BC0030014', 'b!u(si)ness fou}l{rt-een ~`@#$%^-_=[]|\\;:\'",<>./', 'ACTIVE', 'BEN', '123456776BC0001'), + create_solr_doc('BC0030001', '01 solr special && char', 'ACTIVE', 'BEN', '123456789BC0001'), + create_solr_doc('BC0030002', '02 solr special || char', 'ACTIVE', 'BEN', '123456788BC0001'), + create_solr_doc('BC0030003', '03 solr special: char', 'ACTIVE', 'BEN', '123456787BC0001'), + create_solr_doc('BC0030004', '04 solr special + char', 'ACTIVE', 'BEN', '123456786BC0001'), + create_solr_doc('BC0030005', '05 solr special - char', 'ACTIVE', 'BEN', '123456785BC0001'), + create_solr_doc('BC0030006', '06 solr special ! char', 'ACTIVE', 'BEN', '123456784BC0001'), + create_solr_doc('BC0030007', '07 solr special \ char', 'ACTIVE', 'BEN', '123456783BC0001'), + create_solr_doc('BC0030008', '08 solr special (char)', 'ACTIVE', 'BEN', '123456782BC0001'), + create_solr_doc('BC0030009', '09 solr special " char"', 'ACTIVE', 'BEN', '123456781BC0001'), + create_solr_doc('BC0030010', '10 solr special ~ char', 'ACTIVE', 'BEN', '123456780BC0001'), + create_solr_doc('BC0030011', '11 solr special* char', 'ACTIVE', 'BEN', '123456779BC0001'), + create_solr_doc('BC0030012', '12 solr special? char', 'ACTIVE', 'BEN', '123456778BC0001'), + create_solr_doc('BC0030013', '13 solr special / char', 'ACTIVE', 'BEN', '123456777BC0001'), + create_solr_doc('BC0030015', 'special OR AND NOT operators', 'ACTIVE', 'BEN', '123456775BC0001'), + create_solr_doc('BC0030016', 'DIVINE ÉBÉNISTERIE INC.', 'ACTIVE', 'BEN', 'BN00012388') +] diff --git a/search-solr-importer/.env.sample b/search-solr-importer/.env.sample index 77cf18ad..11b84657 100644 --- a/search-solr-importer/.env.sample +++ b/search-solr-importer/.env.sample @@ -1,31 +1,69 @@ -APP_ENV=production +POD_NAMESPACE=local REGISTRIES_SEARCH_API_VERSION=/api/v1 -SOLR_SVC_TEST_URL=http://localhost:8984/solr - -SOLR_BATCH_UPDATE_SIZE=50000 +SOLR_BATCH_UPDATE_SIZE=20000 +SOLR_BATCH_UPDATE_SIZE_SI=5 REINDEX_CORE=False +PRELOADER_JOB=True +INCLUDE_BTR_LOAD=True +INCLUDE_COLIN_LOAD=False +INCLUDE_LEAR_LOAD=True -# ##Local -POD_NAMESPACE=local -SOLR_SVC_URL=http://localhost:8983/solr -REGISTRIES_SEARCH_API_INTERNAL_URL= +BTR_BATCH_LIMIT=100000 + +# SENTRY_DSN= +SENTRY_TSR=1.0 +###Local +REGISTRIES_SEARCH_API_INTERNAL_URL=http://localhost:5000 +SOLR_SVC_LEADER_URL=http://localhost:8873/solr +SOLR_SVC_FOLLOWER_URL=http://localhost:8873/solr + +DATABASE_HOST_LEAR= DATABASE_NAME= -DATABASE_USERNAME= DATABASE_PASSWORD= -DATABASE_HOST=localhost -DATABASE_PORT=5432 +DATABASE_PORT= +DATABASE_USERNAME= + +DATABASE_HOST_BTR= +DATABASE_NAME_BTR= +DATABASE_PASSWORD_BTR= +DATABASE_PORT_BTR= +DATABASE_USERNAME_BTR= -ORACLE_DB_NAME= ORACLE_USER= ORACLE_PASSWORD= -ORACLE_HOST=localhost -ORACLE_PORT=1521 +ORACLE_DB_NAME= +ORACLE_HOST= +ORACLE_PORT= + +###DEV +KEYCLOAK_AUTH_TOKEN_URL= +BUSINESS_SEARCH_SERVICE_ACCOUNT_CLIENT_ID= +BUSINESS_SEARCH_SERVICE_ACCOUNT_SECRET= + +# DATABASE_HOST_LEAR=OCP +# DATABASE_NAME= +# DATABASE_PASSWORD= +# DATABASE_PORT= +# DATABASE_USERNAME= + +# DATABASE_LOCATION=GCP +# DATABASE_USERNAME_GCP= +# DATABASE_PASSWORD_GCP= +# DATABASE_NAME_GCP= +# DATABASE_HOST_GCP= +# DATABASE_PORT_GCP= + +# DATABASE_HOST_BTR= +# DATABASE_NAME_BTR= +# DATABASE_PASSWORD_BTR= +# DATABASE_PORT_BTR= +# DATABASE_USERNAME_BTR= -# ##DEV +###TEST -# ##TEST +###SANDBOX -# ##PROD +###PROD diff --git a/search-solr-importer/data_import_handler.py b/search-solr-importer/data_import_handler.py index 21f6df55..ad334fc1 100644 --- a/search-solr-importer/data_import_handler.py +++ b/search-solr-importer/data_import_handler.py @@ -1,4 +1,4 @@ -# Copyright © 2022 Province of British Columbia +# Copyright © 2023 Province of British Columbia # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,113 +11,145 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""The SEARCH API service. - -This module is the API for the BC Registries Registry Search system. -""" +"""The BOR solr data import service.""" +import gc import sys -from http import HTTPStatus -import requests from flask import current_app from search_api.exceptions import SolrException -from search_api.services import search_solr -from search_api.services.solr.solr_docs import BusinessDoc from search_solr_importer import create_app -from search_solr_importer.utils import collect_colin_data, collect_lear_data, prep_data - - -def update_solr(base_docs: list[BusinessDoc], data_name: str) -> int: - """Import data into solr.""" - count = 0 - offset = 0 - rows = current_app.config.get('BATCH_SIZE', 1000) - retry_count = 0 - erred_record_count = 0 - while count < len(base_docs) and rows > 0 and len(base_docs) - offset > 0: - batch_amount = min(rows, len(base_docs) - offset) - count += batch_amount - # send batch to solr - try: - search_solr.create_or_replace_docs(base_docs[offset:count]) - retry_count = 0 - except SolrException as err: # pylint: disable=bare-except; - current_app.logger.debug(err) - if retry_count < 3: - # retry - current_app.logger.debug('Failed to update solr. Trying again (%s of 3)...', retry_count + 1) - retry_count += 1 - # set count back - count -= batch_amount - continue - - # log error and skip - current_app.logger.error('Retry count exceeded for batch. Skipping batch.') - # add number of records in failed batch to the erred count - erred_record_count += (count - offset) - - offset = count - current_app.logger.debug(f'Total {data_name} base doc records imported: {count - erred_record_count}') - return count - - -def load_search_core(): # pylint: disable=too-many-statements +from search_solr_importer.utils import (collect_btr_data, collect_colin_data, collect_lear_data, + prep_data, prep_data_btr, reindex_post, reindex_prep, + reindex_recovery, resync, update_solr, update_suggester) + + +def load_search_core(): # pylint: disable=too-many-statements,too-many-locals,too-many-branches; will update """Load data from LEAR and COLIN into the search core.""" try: - colin_data_cur = collect_colin_data() - colin_data = colin_data_cur.fetchall() - current_app.logger.debug('Prepping COLIN data...') - prepped_colin_data = prep_data(colin_data, colin_data_cur, 'COLIN') - current_app.logger.debug(f'{len(prepped_colin_data)} COLIN records ready for import.') - lear_data_cur = collect_lear_data() - lear_data = lear_data_cur.fetchall() - current_app.logger.debug('Prepping LEAR data...') - prepped_lear_data = prep_data(lear_data, lear_data_cur, 'LEAR') - current_app.logger.debug(f'{len(prepped_lear_data)} LEAR records ready for import.') - if current_app.config.get('REINDEX_CORE', False): - # delete existing index - current_app.logger.debug('REINDEX_CORE set: deleting current solr index...') - search_solr.delete_all_docs() - # execute update to solr in batches - current_app.logger.debug('Importing records from COLIN...') - count = update_solr(prepped_colin_data, 'COLIN') - current_app.logger.debug('COLIN import completed.') - current_app.logger.debug('Importing records from LEAR...') - count += update_solr(prepped_lear_data, 'LEAR') - current_app.logger.debug('LEAR import completed.') - current_app.logger.debug(f'Total records imported: {count}') - - if not current_app.config.get('PRELOADER_JOB', False): - try: - current_app.logger.debug('Resyncing any overwritten docs during import...') - search_api_url = f'{current_app.config.get("SEARCH_API_URL")}{current_app.config.get("SEARCH_API_V1")}' - resync_resp = requests.post(url=f'{search_api_url}/internal/solr/update/resync', - json={'minutesOffset': 60}, - timeout=120) - if resync_resp.status_code != HTTPStatus.CREATED: - if resync_resp.status_code == HTTPStatus.GATEWAY_TIMEOUT: - current_app.logger.debug('Resync timed out -- check api for any individual failures.') - else: - current_app.logger.error('Resync failed with status %s', resync_resp.status_code) - current_app.logger.debug('Resync complete.') - except Exception as error: # noqa: B902 - current_app.logger.debug(error.with_traceback(None)) - current_app.logger.error('Resync failed.') - - if current_app.config.get('REINDEX_CORE', False): - current_app.logger.debug('Building suggester...') - try: - search_solr.suggest('', 1, True) - except SolrException as err: - current_app.logger.debug(f'SOLR gave status code: {err.status_code}') - if err.status_code in [HTTPStatus.BAD_GATEWAY, HTTPStatus.GATEWAY_TIMEOUT]: - current_app.logger.error('SOLR timeout most likely due to suggester build. ' + - 'Please wait a couple minutes and then verify import ' - 'and suggester build manually in the solr admin UI.') - return - raise err - current_app.logger.debug('Suggester built.') + is_reindex = current_app.config.get('REINDEX_CORE') + is_preload = current_app.config.get('PRELOADER_JOB') + include_btr_load = current_app.config.get('INCLUDE_BTR_LOAD') + include_colin_load = current_app.config.get('INCLUDE_COLIN_LOAD') + include_lear_load = current_app.config.get('INCLUDE_LEAR_LOAD') + final_record = None + + if is_reindex and current_app.config.get('IS_PARTIAL_IMPORT'): + current_app.logger.error('Attempted reindex on partial data set.') + current_app.logger.debug('Setting reindex to False to prevent potential data loss.') + is_reindex = False + + if is_reindex: + current_app.logger.debug('---------- Pre Reindex Actions ----------') + reindex_prep(is_preload) + + try: + colin_count = 0 + if include_colin_load: + current_app.logger.debug('---------- Collecting/Importing COLIN Entities ----------') + colin_data_cur = collect_colin_data() + current_app.logger.debug('Fetching corp batch rows...') + colin_data = colin_data_cur.fetchall() + colin_data_cur.close() + # NB: need full data set under each corp num to collapse parties properly + current_app.logger.debug('********** Mapping COLIN Entities **********') + prepped_colin_data = prep_data(colin_data, + [desc[0].lower() for desc in colin_data_cur.description], + 'COLIN') + current_app.logger.debug(f'COLIN businesses ready for import: {len(prepped_colin_data)}') + # execute update to solr in batches + current_app.logger.debug('********** Importing COLIN Entities **********') + colin_count = update_solr(prepped_colin_data, 'COLIN') + # free up memory + final_record = [prepped_colin_data[-1]], 'COLIN', False + del colin_data, prepped_colin_data + gc.collect() + current_app.logger.debug(f'COLIN import completed. Total COLIN businesses imported: {colin_count}.') + + lear_count = 0 + if include_lear_load: + current_app.logger.debug('---------- Collecting LEAR Entities ----------') + lear_data_cur = collect_lear_data() + lear_data = lear_data_cur.fetchall() + + current_app.logger.debug('---------- Mapping LEAR data ----------') + prepped_lear_data = prep_data( + data=lear_data, + data_descs=[desc[0].lower() for desc in lear_data_cur.description], + source='LEAR', + ) + current_app.logger.debug(f'{len(prepped_lear_data)} LEAR records ready for import.') + + # execute update to solr in batches + current_app.logger.debug('---------- Importing LEAR entities ----------') + lear_count = update_solr(prepped_lear_data, 'LEAR') + # free up memory + final_record = [prepped_lear_data[-1]], 'LEAR', False + del lear_data, prepped_lear_data + gc.collect() + current_app.logger.debug(f'LEAR import completed. Total LEAR businesses imported: {lear_count}') + + current_app.logger.debug(f'Total businesses imported: {colin_count + lear_count}') + + total_btr_count = 0 + if include_btr_load: + current_app.logger.debug('---------- Collecting/Importing BTR Data ----------') + btr_fetch_count = 0 + batch_limit = current_app.config.get('BTR_BATCH_LIMIT') + loop_count = 0 + + while loop_count < 100: # NOTE: should never get to this condition + loop_count += 1 + current_app.logger.debug('********** Collecting BTR data **********') + btr_data_cur = collect_btr_data(batch_limit, btr_fetch_count) + btr_data = btr_data_cur.fetchall() + btr_fetch_count += len(btr_data) + btr_data_cur.close() + if not btr_data: + break + + current_app.logger.debug('********** Mapping BTR data **********') + prepped_btr_data = prep_data_btr(btr_data) + current_app.logger.debug(f'{len(prepped_btr_data)} BTR records ready for import.') + + current_app.logger.debug('********** Importing BTR entities **********') + total_btr_count += update_solr(prepped_btr_data, 'BTR', True) + current_app.logger.debug(f'BTR batch import completed. Records imported: {total_btr_count}.') + # free up memory + final_record = [prepped_btr_data[-1]], 'BTR', True + del btr_data, prepped_btr_data + gc.collect() + + current_app.logger.debug(f'BTR import completed. Total BTR partial records imported: {total_btr_count}') + + except Exception as err: # noqa: B902 + if is_reindex and not is_preload: + reindex_recovery() + raise err # pass along + + try: + current_app.logger.debug('---------- Resync ----------') + resync() + except Exception as error: # noqa: B902 + current_app.logger.debug(error.with_traceback(None)) + current_app.logger.error('Resync failed.') + + try: + current_app.logger.debug('---------- Final Commit ----------') + current_app.logger.debug('Triggering final commit on leader to make changes visible to search...') + update_solr(final_record[0], final_record[1], final_record[2]) + current_app.logger.debug('Final commit complete.') + + except Exception as error: # noqa: B902 + current_app.logger.debug(error.with_traceback(None)) + current_app.logger.error('Final commit failed. (This will only effect DEV).') + + if is_reindex: + current_app.logger.debug('---------- Post Reindex Actions ----------') + update_suggester() + if not is_preload: + reindex_post() + current_app.logger.debug('SOLR import finished successfully.') except SolrException as err: diff --git a/search-solr-importer/devops/vaults.json b/search-solr-importer/devops/vaults.json index 093c0f8e..c02d1b79 100644 --- a/search-solr-importer/devops/vaults.json +++ b/search-solr-importer/devops/vaults.json @@ -21,6 +21,13 @@ "registries-search-solr-importer" ] }, + { + "vault": "keycloak", + "application": [ + "base", + "business-search-service-account" + ] + }, { "vault": "sentry", "application": [ diff --git a/search-solr-importer/src/search_solr_importer/__init__.py b/search-solr-importer/src/search_solr_importer/__init__.py index 9acd73d6..04f1f220 100644 --- a/search-solr-importer/src/search_solr_importer/__init__.py +++ b/search-solr-importer/src/search_solr_importer/__init__.py @@ -20,7 +20,7 @@ import sentry_sdk # noqa: I001; pylint: disable=ungrouped-imports; conflicts with Flake8 from sentry_sdk.integrations.flask import FlaskIntegration # noqa: I001 from flask import Flask # noqa: I001 -from search_api.services import search_solr +from search_api.services import business_solr from search_solr_importer.config import config from search_solr_importer.logging import setup_logging @@ -69,7 +69,7 @@ def create_app(config_name: str = os.getenv('APP_ENV') or 'production'): # db.init_app(app) oracle_db.init_app(app) - search_solr.init_app(app) + business_solr.init_app(app) babel.init_app(app) register_shellcontext(app) diff --git a/search-solr-importer/src/search_solr_importer/config.py b/search-solr-importer/src/search_solr_importer/config.py index fbd82aba..72c88a30 100644 --- a/search-solr-importer/src/search_solr_importer/config.py +++ b/search-solr-importer/src/search_solr_importer/config.py @@ -40,7 +40,12 @@ class Config(): # pylint: disable=too-few-public-methods PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) - SOLR_SVC_URL = os.getenv('SOLR_SVC_URL', 'http://') + SOLR_SVC_LEADER_CORE = os.getenv('SOLR_SVC_LEADER_CORE', 'business') + SOLR_SVC_FOLLOWER_CORE = os.getenv('SOLR_SVC_FOLLOWER_CORE', 'business_follower') + SOLR_SVC_LEADER_URL = os.getenv('SOLR_SVC_LEADER_URL', 'http://localhost:8873/solr') + SOLR_SVC_FOLLOWER_URL = os.getenv('SOLR_SVC_FOLLOWER_URL', 'http://localhost:8874/solr') + HAS_FOLLOWER = SOLR_SVC_FOLLOWER_URL != SOLR_SVC_LEADER_URL + SEARCH_API_URL = os.getenv('REGISTRIES_SEARCH_API_INTERNAL_URL', 'http://') SEARCH_API_V1 = os.getenv('REGISTRIES_SEARCH_API_VERSION', '') @@ -56,6 +61,31 @@ class Config(): # pylint: disable=too-few-public-methods MODERNIZED_LEGAL_TYPES = os.getenv('MODERNIZED_LEGAL_TYPES', 'BEN,CBEN,CP,GP,SP').upper().split(',') + BATCH_SIZE_SOLR = int(os.getenv('SOLR_BATCH_UPDATE_SIZE', '1000')) + BATCH_SIZE_SOLR_SI = int(os.getenv('SOLR_BATCH_UPDATE_SIZE_SI', '1000')) + REINDEX_CORE = os.getenv('REINDEX_CORE', 'False') == 'True' + PRELOADER_JOB = os.getenv('PRELOADER_JOB', 'False') == 'True' + INCLUDE_BTR_LOAD = os.getenv('INCLUDE_BTR_LOAD', 'False') == 'True' + INCLUDE_COLIN_LOAD = os.getenv('INCLUDE_COLIN_LOAD', 'True') == 'True' + INCLUDE_LEAR_LOAD = os.getenv('INCLUDE_LEAR_LOAD', 'True') == 'True' + RESYNC_OFFSET = os.getenv('RESYNC_OFFSET', '130') + + BTR_BATCH_LIMIT = int(os.getenv('BTR_BATCH_LIMIT', '100000')) + + MODERNIZED_LEGAL_TYPES = os.getenv('MODERNIZED_LEGAL_TYPES', 'BEN,CBEN,CP,GP,SP').upper().split(',') + + # TODO: or not include btr + IS_PARTIAL_IMPORT = not INCLUDE_COLIN_LOAD or not INCLUDE_LEAR_LOAD + + # Service account details + ACCOUNT_SVC_AUTH_URL = os.getenv('KEYCLOAK_AUTH_TOKEN_URL') + ACCOUNT_SVC_CLIENT_ID = os.getenv('BUSINESS_SEARCH_SERVICE_ACCOUNT_CLIENT_ID') + ACCOUNT_SVC_CLIENT_SECRET = os.getenv('BUSINESS_SEARCH_SERVICE_ACCOUNT_SECRET') + try: + ACCOUNT_SVC_TIMEOUT = int(os.getenv('AUTH_API_TIMEOUT', '20')) + except: # pylint: disable=bare-except; # noqa: B901, E722 + ACCOUNT_SVC_TIMEOUT = 20 + # ORACLE - CDEV/CTST/CPRD ORACLE_USER = os.getenv('ORACLE_USER', '') ORACLE_PASSWORD = os.getenv('ORACLE_PASSWORD', '') @@ -70,7 +100,7 @@ class Config(): # pylint: disable=too-few-public-methods DB_USER = os.getenv('DATABASE_USERNAME', '') DB_PASSWORD = os.getenv('DATABASE_PASSWORD', '') DB_NAME = os.getenv('DATABASE_NAME', '') - DB_HOST = os.getenv('DATABASE_HOST', '') + DB_HOST = os.getenv('DATABASE_HOST_LEAR', '') DB_PORT = os.getenv('DATABASE_PORT', '5432') if DB_LOCATION == 'GCP': @@ -80,10 +110,11 @@ class Config(): # pylint: disable=too-few-public-methods DB_HOST = os.getenv('DATABASE_HOST_GCP', '') DB_PORT = os.getenv('DATABASE_PORT_GCP', '5432') - if DB_UNIX_SOCKET := os.getenv('DATABASE_UNIX_SOCKET', None): - SQLALCHEMY_DATABASE_URI = f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@/{DB_NAME}?host={DB_UNIX_SOCKET}' - else: - SQLALCHEMY_DATABASE_URI = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}' + BTR_DB_USER = os.getenv('DATABASE_USERNAME_BTR', '') + BTR_DB_PASSWORD = os.getenv('DATABASE_PASSWORD_BTR', '') + BTR_DB_NAME = os.getenv('DATABASE_NAME_BTR', '') + BTR_DB_HOST = os.getenv('DATABASE_HOST_BTR', '') + BTR_DB_PORT = os.getenv('DATABASE_PORT_BTR', '5432') # Connection pool settings DB_MIN_POOL_SIZE = os.getenv('DATABASE_MIN_POOL_SIZE', '2') diff --git a/search-solr-importer/src/search_solr_importer/utils/__init__.py b/search-solr-importer/src/search_solr_importer/utils/__init__.py index 827b06b0..af6d8c59 100644 --- a/search-solr-importer/src/search_solr_importer/utils/__init__.py +++ b/search-solr-importer/src/search_solr_importer/utils/__init__.py @@ -12,5 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Manages util functions for the importer.""" -from .data_collection import collect_colin_data, collect_lear_data -from .data_parsing import prep_data +from .data_collection import collect_btr_data, collect_colin_data, collect_lear_data +from .data_parsing import prep_data, prep_data_btr +from .reindex import reindex_post, reindex_prep, reindex_recovery +from .update_solr import resync, update_solr, update_suggester diff --git a/search-solr-importer/src/search_solr_importer/utils/data_collection.py b/search-solr-importer/src/search_solr_importer/utils/data_collection.py index 3541c870..cde0d127 100644 --- a/search-solr-importer/src/search_solr_importer/utils/data_collection.py +++ b/search-solr-importer/src/search_solr_importer/utils/data_collection.py @@ -134,3 +134,26 @@ def _collect_lear_data_gcp(): AND er.cessation_date IS NULL """) return cur + + +def collect_btr_data(limit: int = None, offset: int = None): + """Collect data from BTR.""" + limit_clause = '' + if limit: + limit_clause = f'LIMIT {limit}' + if offset: + limit_clause += f' OFFSET {offset}' + if limit_clause: + # NOTE: needed in order to make sure we get every record when doing batch loads + limit_clause = f'ORDER BY id {limit_clause}' + + current_app.logger.debug('Connecting to BTR GCP Postgres instance...') + conn = psycopg2.connect(host=current_app.config.get('BTR_DB_HOST'), + port=current_app.config.get('BTR_DB_PORT'), + database=current_app.config.get('BTR_DB_NAME'), + user=current_app.config.get('BTR_DB_USER'), + password=current_app.config.get('BTR_DB_PASSWORD')) + cur = conn.cursor() + current_app.logger.debug('Collecting BTR data...') + cur.execute(f'SELECT payload FROM submission {limit_clause}') + return cur diff --git a/search-solr-importer/src/search_solr_importer/utils/data_parsing.py b/search-solr-importer/src/search_solr_importer/utils/data_parsing.py index 4352d889..cee4a1d5 100644 --- a/search-solr-importer/src/search_solr_importer/utils/data_parsing.py +++ b/search-solr-importer/src/search_solr_importer/utils/data_parsing.py @@ -16,7 +16,7 @@ from datedelta import datedelta from flask import current_app -from search_api.services.solr.solr_docs import BusinessDoc, PartyDoc +from search_api.services.business_solr.doc_fields import PartyField from search_solr_importer.enums import ColinPartyTypeCode @@ -65,49 +65,52 @@ def _is_good_standing(item_dict: dict, source: str) -> bool: # pylint: disable= return None -def prep_data(data: list, cur, source: str) -> list[BusinessDoc]: # pylint: disable=too-many-branches, too-many-locals +def _get_business_name(doc_info: dict) -> str: + """Return the parsed name of the business in the given doc info.""" + if doc_info['legal_name']: + return doc_info['legal_name'].strip() + return doc_info.get('legal_name_alt', '').strip() + + +def _get_party_name(doc_info: dict) -> str: + """Return the parsed name of the party in the given doc info.""" + if doc_info['organization_name']: + return doc_info['organization_name'].strip() + if doc_info.get('organization_name_alt'): + return doc_info['organization_name_alt'].strip() + if doc_info.get('organization_name_colin'): + return doc_info['organization_name_colin'].strip() + person_name = '' + if doc_info['first_name']: + person_name += doc_info['first_name'].strip() + if doc_info['middle_initial']: + person_name += ' ' + doc_info['middle_initial'].strip() + if doc_info['last_name']: + person_name += ' ' + doc_info['last_name'].strip() + return person_name.strip() + + +def _get_party_role(type_cd: str, legal_type: str) -> str: + """Return the lear party_type given the colin party type code.""" + if type_cd == ColinPartyTypeCode.DIRECTOR: + return 'director' + if type_cd == ColinPartyTypeCode.FIRM_COMP_PARTY: + return 'completing_party' + if type_cd == ColinPartyTypeCode.INCORPORATOR: + return 'incorporator' + if type_cd in [ColinPartyTypeCode.FIRM_BUS_OWNER.value, ColinPartyTypeCode.FIRM_IND_OWNER.value]: + if legal_type == 'SP': + return 'proprietor' + return 'partner' + return 'unknown' + + +def prep_data(data: list, data_descs: list[str], source: str) -> list[dict]: # pylint: disable=too-many-branches """Return the list of BusinessDocs for the given raw db data.""" prepped_data = {} - def get_business_name(doc_info: dict) -> str: - """Return the parsed name of the business in the given doc info.""" - if doc_info['legal_name']: - return doc_info['legal_name'].strip() - return doc_info.get('legal_name_alt', '').strip() - - def get_party_name(doc_info: dict) -> str: - """Return the parsed name of the party in the given doc info.""" - if doc_info['organization_name']: - return doc_info['organization_name'].strip() - if doc_info.get('organization_name_alt'): - return doc_info['organization_name_alt'].strip() - if doc_info.get('organization_name_colin'): - return doc_info['organization_name_colin'].strip() - person_name = '' - if doc_info['first_name']: - person_name += doc_info['first_name'].strip() - if doc_info['middle_initial']: - person_name += ' ' + doc_info['middle_initial'].strip() - if doc_info['last_name']: - person_name += ' ' + doc_info['last_name'].strip() - return person_name.strip() - - def get_party_role(type_cd: str, legal_type: str) -> str: - """Return the lear party_type given the colin party type code.""" - if type_cd == ColinPartyTypeCode.DIRECTOR: - return 'director' - if type_cd == ColinPartyTypeCode.FIRM_COMP_PARTY: - return 'completing_party' - if type_cd == ColinPartyTypeCode.INCORPORATOR: - return 'incorporator' - if type_cd in [ColinPartyTypeCode.FIRM_BUS_OWNER.value, ColinPartyTypeCode.FIRM_IND_OWNER.value]: - if legal_type == 'SP': - return 'proprietor' - return 'partner' - return 'unknown' - for item in data: - item_dict = dict(zip([x[0].lower() for x in cur.description], item)) + item_dict = dict(zip(data_descs, item)) # NOTE: if a business has > 1 restoration filing it will have a record per restoration # - code will ignore duplicates below (expects most relevant restoration to come first) base_doc_already_added = item_dict['identifier'] in prepped_data @@ -116,7 +119,7 @@ def get_party_role(type_cd: str, legal_type: str) -> str: if party_id and source == 'COLIN': # prep party fields if not item_dict.get('role'): - item_dict['role'] = get_party_role(item_dict.get('party_typ_cd'), item_dict['legal_type']) + item_dict['role'] = _get_party_role(item_dict.get('party_typ_cd'), item_dict['legal_type']) if not item_dict.get('party_type'): item_dict['party_type'] = 'organization' if item_dict['organization_name'] else 'person' @@ -132,9 +135,9 @@ def get_party_role(type_cd: str, legal_type: str) -> str: prepped_data[item_dict['identifier']]['parties'][party_id] = { 'parentBN': item_dict['tax_id'], 'parentLegalType': item_dict['legal_type'], - 'parentName': get_business_name(item_dict), + 'parentName': _get_business_name(item_dict), 'parentStatus': item_dict['state'], - 'partyName': get_party_name(item_dict), + 'partyName': _get_party_name(item_dict), 'partyRoles': [item_dict['role']], 'partyType': item_dict['party_type'] } @@ -147,21 +150,23 @@ def get_party_role(type_cd: str, legal_type: str) -> str: prepped_data[identifier] = { 'goodStanding': _is_good_standing(item_dict, source), 'legalType': item_dict['legal_type'], + 'id': identifier, 'identifier': identifier, - 'name': get_business_name(item_dict), + 'name': _get_business_name(item_dict), 'status': item_dict['state'], - 'bn': item_dict['tax_id'], - 'parties': {} + 'bn': item_dict['tax_id'] } if party_id: # add party doc to base doc prepped_data[identifier]['parties'] = { party_id: { + 'id': f'{identifier}_{party_id}', 'parentBN': item_dict['tax_id'], + 'parentIdentifier': identifier, 'parentLegalType': item_dict['legal_type'], - 'parentName': get_business_name(item_dict), + 'parentName': _get_business_name(item_dict), 'parentStatus': item_dict['state'], - 'partyName': get_party_name(item_dict), + 'partyName': _get_party_name(item_dict), 'partyRoles': [item_dict['role']], 'partyType': item_dict['party_type'] } @@ -172,7 +177,42 @@ def get_party_role(type_cd: str, legal_type: str) -> str: if base_doc.get('parties'): flattened_parties = [] for party_key in base_doc['parties']: - flattened_parties.append(PartyDoc(**base_doc['parties'][party_key])) - base_doc['parties'] = flattened_parties - solr_docs.append(BusinessDoc(**base_doc)) + if party := base_doc['parties'][party_key]: + flattened_parties.append(party) + if flattened_parties: + base_doc['parties'] = flattened_parties + solr_docs.append(base_doc) return solr_docs + + +def prep_data_btr(data: list[dict]) -> list[dict]: + """Return the list of partial business docs containing the SI party information.""" + prepped_data: list[dict] = [] + + for item in data: + submission = item[0] + identifier = submission['businessIdentifier'] + + business = {'id': identifier, 'parties': {'add': []}} + + # collect current SIs. + for person in submission.get('personStatements', []): + party_name = '' + for name in person.get('names'): + if name.get('type') == 'individual': # expecting this to be 'individual' or 'alternative' + party_name = name.get('fullName') + break + if not party_name: + current_app.logger.debug('Person names: %s', person.get('names')) + current_app.logger.error('Error parsing SI name for %s', identifier) + + business['parties']['add'].append({ + PartyField.UNIQUE_KEY.value: identifier + '_' + person['uuid'], + PartyField.PARTY_NAME.value: party_name, + PartyField.PARTY_ROLE.value: ['significant individual'], + PartyField.PARENT_TYPE.value: 'person' + }) + + prepped_data.append(business) + + return prepped_data diff --git a/search-solr-importer/src/search_solr_importer/utils/reindex.py b/search-solr-importer/src/search_solr_importer/utils/reindex.py new file mode 100644 index 00000000..be611e0a --- /dev/null +++ b/search-solr-importer/src/search_solr_importer/utils/reindex.py @@ -0,0 +1,124 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manages util methods for reindexing.""" +from datetime import datetime, timezone +from http import HTTPStatus +from time import sleep + +from flask import current_app +from search_api.exceptions import SolrException +from search_api.services import business_solr + + +def get_replication_detail(field: str, leader: bool): + """Return the replication detail for the core.""" + details: dict = (business_solr.replication('details', leader)).json()['details'] + # remove data unwanted in the logs + if field != 'commits' and 'commits' in details: + del details['commits'] + if not leader and field != 'leaderDetails' and 'leaderDetails' in details['follower']: + del details['follower']['leaderDetails'] + + # log full details and return data element + current_app.logger.debug('Full replication details: %s', details) + if leader: + return details.get(field) + return details['follower'].get(field) + + +def reindex_prep(is_preload: bool): + """Execute reindex operations needed before index is reloaded.""" + if not is_preload: + # backup leader index + backup_trigger_time = (datetime.utcnow()).replace(tzinfo=timezone.utc) + backup = business_solr.replication('backup', True) + current_app.logger.debug(backup.json()) + if current_app.config.get('HAS_FOLLOWER', True): + # disable follower polling during reindex + disable_polling = business_solr.replication('disablepoll', False) + current_app.logger.debug(disable_polling.json()) + # await 60 seconds in case a poll was in progress and to give time for backup to complete + current_app.logger.debug('Pausing 60s for SOLR to complete reindex prep...') + sleep(60) + # verify current backup is from just now and was successful in case of failure + current_app.logger.debug('Verifying SOLR reindex prep...') + backup_succeeded = False + for i in range(20): + current_app.logger.debug(f'Checking new backup {i + 1} of 20...') + if backup_detail := get_replication_detail('backup', True): + backup_start_time = datetime.fromisoformat(backup_detail['startTime']) + if backup_detail['status'] == 'success' and backup_trigger_time < backup_start_time: + backup_succeeded = True + break + # retry repeatedly (new backup in prod will take a couple minutes) + sleep(30 + (i*2)) + if not backup_succeeded: + raise SolrException('Failed to backup leader index', HTTPStatus.INTERNAL_SERVER_ERROR) + current_app.logger.debug('Backup succeeded. Checking polling disabled...') + if current_app.config.get('HAS_FOLLOWER', True): + # verify follower polling disabled so it doesn't update until reindex is complete + is_polling_disabled = get_replication_detail('isPollingDisabled', False) + if not bool(is_polling_disabled): + current_app.logger.debug('is_polling_disabled: %s', is_polling_disabled) + raise SolrException('Failed disable polling on follower', + str(is_polling_disabled), + HTTPStatus.INTERNAL_SERVER_ERROR) + current_app.logger.debug('Polling disabled. Disabling leader replication...') + # disable leader replication for reindex duration (important to do this after polling disabled) + disable_replication = business_solr.replication('disablereplication', True) + current_app.logger.debug(disable_replication.json()) + + # delete existing index + current_app.logger.debug('REINDEX_CORE set: deleting current solr index...') + business_solr.delete_all_docs() + + +def reindex_post(): + """Execute post reindex operations on the follower index.""" + if current_app.config.get('HAS_FOLLOWER', True): + # reenable leader replication + enable_replication = business_solr.replication('enablereplication', True) + current_app.logger.debug(enable_replication.json()) + sleep(5) + # force the follwer to fetch the new index + fetch_new_idx = business_solr.replication('fetchindex', False) + current_app.logger.debug(fetch_new_idx.json()) + sleep(10) + # renable polling + enable_polling = business_solr.replication('enablepoll', False) + current_app.logger.debug(enable_polling.json()) + + +def reindex_recovery(): + """Restore the index on the leader and renable polling on the follower.""" + restore = business_solr.replication('restore', True) + current_app.logger.debug(restore.json()) + current_app.logger.debug('awaiting restore completion...') + for i in range(100): + current_app.logger.debug(f'Checking restore status ({i + 1} of 100)...') + status = business_solr.replication('restorestatus', True) + current_app.logger.debug(status) + current_app.logger.debug(status.json()) + if (status.json())['restorestatus']['status'] == 'success': + current_app.logger.debug('restore complete.') + enable_replication = business_solr.replication('enablereplication', True) + current_app.logger.debug(enable_replication.json()) + sleep(5) + enable_polling = business_solr.replication('enablepolling', False) + current_app.logger.debug(enable_polling.json()) + return + if (status.json())['status'] == 'failed': + break + sleep(10 + (i*2)) + current_app.logger.error('Possible failure to restore leader index. Manual intervention required.') diff --git a/search-solr-importer/src/search_solr_importer/utils/update_solr.py b/search-solr-importer/src/search_solr_importer/utils/update_solr.py new file mode 100644 index 00000000..3100fd8c --- /dev/null +++ b/search-solr-importer/src/search_solr_importer/utils/update_solr.py @@ -0,0 +1,139 @@ +# Copyright © 2023 Province of British Columbia +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manages util methods for updating business solr via the reg search api.""" +import time +from http import HTTPStatus + +import requests +from flask import current_app +from search_api.exceptions import SolrException +from search_api.services import business_solr +from search_api.services.authz import get_bearer_token + + +def _get_wait_interval(err: Exception): + """Return the base wait interval for the exception.""" + if isinstance(err.args, (tuple, list)) and err.args and isinstance(err.args[0], dict): + if '408' in err.args[0].get('error', {}).get('detail', ''): + # increased base wait time for solr 408 error + return 60 + return 20 + + +def update_solr(docs: list[dict], data_name: str, partial=False) -> int: + """Import data into solr.""" + current_app.logger.debug('Getting token for Import...') + token = get_bearer_token() + headers = {'Authorization': 'Bearer ' + token} + current_app.logger.debug('Token set.') + api_url = f'{current_app.config.get("SEARCH_API_URL")}{current_app.config.get("SEARCH_API_V1")}' + count = 0 + offset = 0 + rows = current_app.config.get('BATCH_SIZE_SOLR', 1000) + if data_name == 'BTR': + rows = current_app.config.get('BATCH_SIZE_SOLR_SI', 1000) + retry_count = 0 + while count < len(docs) and rows > 0 and len(docs) - offset > 0: + batch_amount = int(min(rows, len(docs) - offset) / (retry_count + 1)) + count += batch_amount + # call api import endpoint + try: + current_app.logger.debug('Importing batch...') + import_resp = requests.put(url=f'{api_url}/internal/solr/import', + headers=headers, + json={'businesses': docs[offset:count], + 'timeout': '60', + 'type': 'partial' if partial else 'full'}, + timeout=90) + + if import_resp.status_code != HTTPStatus.CREATED: + if import_resp.status_code == HTTPStatus.UNAUTHORIZED: + # renew token for next try + current_app.logger.debug('Getting new token for Import...') + token = get_bearer_token() + headers = {'Authorization': 'Bearer ' + token} + current_app.logger.debug('New Token set.') + # try again + raise Exception({'error': import_resp.json(), 'status_code': import_resp.status_code}) # noqa: E501; pylint: disable=broad-exception-raised + retry_count = 0 + except Exception as err: # noqa: B902; pylint: disable=bare-except; + current_app.logger.debug(err) + if retry_count < 5: + # retry + current_app.logger.debug('Failed to update solr with batch. Trying again (%s of 5)...', retry_count + 1) + retry_count += 1 + # await some time before trying again + base_wait_time = _get_wait_interval(err) + current_app.logger.debug('Awaiting %s seconds before trying again...', base_wait_time * retry_count) + time.sleep(base_wait_time * retry_count) + # set count back + count -= batch_amount + continue + if retry_count == 5: + # wait x minutes and then try one more time + current_app.logger.debug( + 'Max retries for batch exceeded. Awaiting 2 mins before trying one more time...') + time.sleep(120) + # renew token for next try + current_app.logger.debug('Getting new token for Import...') + token = get_bearer_token() + headers = {'Authorization': 'Bearer ' + token} + current_app.logger.debug('New Token set.') + # try again + retry_count += 1 + count -= batch_amount + continue + # log and raise error + current_app.logger.error('Retry count exceeded for batch.') + raise SolrException('Retry count exceeded for updating SOLR. Aborting import.') from err + offset = count + current_app.logger.debug(f'Total batch {data_name} doc records imported: {count}') + return count + + +def resync(): + """Resync to catch any records that had an update during the import.""" + current_app.logger.debug('Getting token for Resync...') + token = get_bearer_token() + headers = {'Authorization': 'Bearer ' + token} + + current_app.logger.debug('Resyncing any overwritten docs during import...') + api_url = f'{current_app.config.get("SEARCH_API_URL")}{current_app.config.get("SEARCH_API_V1")}' + resync_resp = requests.post(url=f'{api_url}/internal/solr/update/resync', + headers=headers, + json={'minutesOffset': current_app.config.get('RESYNC_OFFSET')}, + timeout=60) + if resync_resp.status_code != HTTPStatus.CREATED: + if resync_resp.status_code == HTTPStatus.GATEWAY_TIMEOUT: + current_app.logger.debug('Resync timed out -- check api for any individual failures.') + else: + current_app.logger.error('Resync failed: %s, %s', resync_resp.status_code, resync_resp.json()) + else: + current_app.logger.debug('Resync complete.') + + +def update_suggester(): + """Build the suggester.""" + current_app.logger.debug('Building suggester...') + try: + business_solr.suggest('', 1, True) + except SolrException as err: + current_app.logger.debug(f'SOLR gave status code: {err.status_code}') + if err.status_code in [HTTPStatus.BAD_GATEWAY, HTTPStatus.GATEWAY_TIMEOUT]: + current_app.logger.error('SOLR timeout most likely due to suggester build. ' + + 'Please wait a couple minutes and then verify import ' + + 'and suggester build manually in the solr admin UI.') + return + raise err + current_app.logger.debug('Suggester built.') diff --git a/search-solr-importer/src/search_solr_importer/version.py b/search-solr-importer/src/search_solr_importer/version.py index 851a14cb..6f0451f7 100644 --- a/search-solr-importer/src/search_solr_importer/version.py +++ b/search-solr-importer/src/search_solr_importer/version.py @@ -22,4 +22,4 @@ Development release segment: .devN """ -__version__ = '1.3.2' # pylint: disable=invalid-name +__version__ = '1.4.0' # pylint: disable=invalid-name diff --git a/search-solr/.gitignore b/search-solr/.gitignore new file mode 100644 index 00000000..e02b98ec --- /dev/null +++ b/search-solr/.gitignore @@ -0,0 +1,2 @@ +solr/business/conf/_schema_analysis* +solr/business/data/* \ No newline at end of file diff --git a/search-solr/Dockerfile b/search-solr/Dockerfile index 425d1b46..d52bb0a7 100644 --- a/search-solr/Dockerfile +++ b/search-solr/Dockerfile @@ -1,19 +1,48 @@ -FROM bitnami/solr:9.0.0 +FROM solr:9.6.1 USER root +ARG SOLR_VERSION="9.6.1" ARG VCS_REF="missing" ARG BUILD_DATE="missing" +ARG CORE="business" +ARG FOLLOWER_CORE="business_follower" +ARG SOLR_OPTS_VAR='$SOLR_OPTS' +ARG JVM_MEM="1g" ENV VCS_REF=${VCS_REF} ENV BUILD_DATE=${BUILD_DATE} +ENV CORE=${CORE} +ENV FOLLOWER_CORE=${FOLLOWER_CORE} +ENV JVM_MEM=${JVM_MEM} LABEL org.label-schema.vcs-ref=${VCS_REF} \ org.label-schema.build-date=${BUILD_DATE} -# Copy over solr files -COPY bitnami /bitnami -RUN rm -rf bitnami/bitnami/solr/server/solr/search/data +# copy files into leader / follower configsets +COPY ./solr/${CORE}/conf /opt/solr-${SOLR_VERSION}/server/solr/configsets/${CORE}/conf +COPY ./solr/${CORE}/conf /opt/solr-${SOLR_VERSION}/server/solr/configsets/${FOLLOWER_CORE}/conf +# copy follower specific config into configset +COPY ./solr/${FOLLOWER_CORE}/conf/solrconfig.xml /opt/solr-${SOLR_VERSION}/server/solr/configsets/${FOLLOWER_CORE}/conf/solrconfig.xml +# copy in starting core +COPY ./solr/${CORE}/conf /var/solr/data/${CORE}/conf +COPY ./solr/${CORE}/core.properties /var/solr/data/${CORE}/core.properties +# copy core specific config into starting core +COPY ./solr/${CORE}/conf/solrconfig.xml /var/solr/data/${CORE}/conf/solrconfig.xml +# copy solr.xml (will be used by both leader and followers) +COPY ./solr/solr.xml /opt/solr-${SOLR_VERSION}/server/solr/solr.xml +# solr env overrides +RUN echo "" >> /etc/default/solr.in.sh +RUN echo SOLR_OPTS=\"$SOLR_OPTS_VAR -Dsolr.disable.allowUrls=true\" >> /etc/default/solr.in.sh +# NB: OPs flow is to set these afterwards so the same image can be tagged across dev/test/prod +# RUN echo SOLR_OPTS=\"$SOLR_OPTS_VAR -Dsolr.environment=$ENVIRONMENT\" >> /etc/default/solr.in.sh +# RUN echo SOLR_OPTS=\"$SOLR_OPTS_VAR -Dsolr.leaderUrl=$LEADER_URL\" >> /etc/default/solr.in.sh -RUN chmod -R 777 /bitnami +# jvm memory +RUN echo SOLR_JAVA_MEM=\"-Xms$JVM_MEM -Xmx$JVM_MEM\" >> /etc/default/solr.in.sh + +RUN chmod -R 777 /var/solr/data +RUN chmod -R 777 /opt/solr-${SOLR_VERSION} USER 1001 + +EXPOSE 8983 diff --git a/search-solr/Makefile b/search-solr/Makefile new file mode 100644 index 00000000..c5ecf652 --- /dev/null +++ b/search-solr/Makefile @@ -0,0 +1,57 @@ +.PHONY: build +.PHONY: build-leader +.PHONY: build-follower +.PHONY: setup + +DOCKER_NAME_LEADER:=business-solr-leader +DOCKER_NAME_FOLLOWER:=business-solr-follower +DOCKER_NAME_LOCAL:=business-solr-local + +################################################################################# +# COMMANDS -- Setup # +################################################################################# + +################################################################################# +# COMMANDS - CD # +################################################################################# +build: build-leader build-follower +build-follower: + docker build . -t $(DOCKER_NAME_FOLLOWER) \ + --platform linux/amd64 \ + --build-arg VCS_REF=$(shell git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ + --build-arg CORE=business_follower \ + --build-arg FOLLOWER_CORE=business_follower \ + --build-arg JVM_MEM=2g + +build-leader: + docker build . -t $(DOCKER_NAME_LEADER) \ + --platform linux/amd64 \ + --build-arg VCS_REF=$(shell git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ + --build-arg CORE=business \ + --build-arg FOLLOWER_CORE=business_follower \ + --build-arg JVM_MEM=12g + +build-local: + docker build . -t $(DOCKER_NAME_LOCAL) \ + --platform linux/amd64 \ + --build-arg VCS_REF=$(shell git rev-parse --short HEAD) \ + --build-arg BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") \ + --build-arg CORE=business \ + --build-arg FOLLOWER_CORE=business_follower \ + --build-arg JVM_MEM=512m + +################################################################################# +# COMMANDS - Local # +################################################################################# + +################################################################################# +# Self Documenting Commands # +################################################################################# +.PHONY: help + +.DEFAULT_GOAL := help + +help: + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/search-solr/README.md b/search-solr/README.md index 1d03dbf6..ab5784e9 100644 --- a/search-solr/README.md +++ b/search-solr/README.md @@ -4,42 +4,37 @@ BC Registries Registry Search SOLR ## Technology Stack Used -- bitnami/solr +- Apache Solr - Docker ### Development Setup -1. Pull the bitnami solr docker image - -- `docker pull bitnami/solr:latest` - -2. Run your solr container - -- if first time: - - for a persistent index: `docker run -d -p 8983:8983 -v //registries-search/search-solr/bitnami:/bitnami --name solr-local bitnami/solr:9.0.0` (it will be available on port 8983) - - for a temp index (changes will not persist -- use for search-api unit tests): - - `docker build . -t solr-test` - - `docker run -it --name=solr-test -p 8999:8983 solr-test` (it will be available on port 8999) +1. Pull the base solr docker image + +- `docker pull solr:9.6.1` + +2. Run your solr containers + +- if first time or need to pickup new solr changes outside of /solr/business directory: + - Build leader image: `make build-local` + - Run leader image: `docker run -d -p 8873:8983 --name business-solr-leader-local business-solr-local` (it will be available on port 8873) + _NOTE: if you want the data to persist then add `-v $PWD/solr/business:/var/solr/data` (do NOT do this for the solr instance used for api unit tests)_ + - Optional: setup follower node + - Get leader IP: `docker inspect business-solr-leader-local | grep IPAddress` + - Use the docker IP to set the leader url: `export LEADER_URL=http://leader_IP:8873/solr/business` + - Build the follower image: `make build-follower` + - Run follower image: `docker run -d -p 8884:8984 --name business-solr-follower-local business-solr-follower` (it will be available on port 8884) + - Add docker network so that follower can poll from leader: + - `docker network create solr` + - `docker network connect solr business-solr-leader-local` + - `docker network connect solr business-solr-follower-local` - else - - `docker start solr-local` or `docker start solr-test` + - `docker start business-solr-leader-local` 3. Check logs for errors -- `docker logs solr-local` +- `docker logs business-solr-leader-local` 4. Go to admin UI in browser and check the solr core is there (it will be empty) -- http://localhost:8983/solr - -5. Data import via the solr importer with REINDEX=True (for persistent index only) - -- see https://github.com/bcgov/registries-search/tree/main/search-solr-importer and you will need: - - run local COLIN oracle db OR setup VPN connection to COLIN dev OR comment out the COLIN load - - run local LEAR db OR port-forward to dev instance OR comment out LEAR load - -6. Stop the solr instance, make changes and reindex / rebuild the suggester - -- `docker stop solr-local` -- make changes -- `docker start solr-local` -- reimport data with REINDEX=True (5.) +- http://localhost:8873/solr diff --git a/search-solr/action.yml b/search-solr/action.yml index 4216cb16..d7315c6f 100644 --- a/search-solr/action.yml +++ b/search-solr/action.yml @@ -1,6 +1,6 @@ # action.yml -name: "Search Solr Action" -description: "Spins up a search solr container on port 8983" +name: "Entity Solr Action" +description: "Spins up a business solr container on port 8983" runs: using: "docker" image: "Dockerfile" diff --git a/search-solr/bitnami/solr/server/solr/README.md b/search-solr/bitnami/solr/server/solr/README.md deleted file mode 100644 index 04cd872b..00000000 --- a/search-solr/bitnami/solr/server/solr/README.md +++ /dev/null @@ -1,79 +0,0 @@ - - -Default Solr Home Directory -============================= - -This directory is the default Solr home directory which holds -configuration files and Solr indexes (called cores). - - -Basic Directory Structure -------------------------- - -The Solr Home directory typically contains the following... - -### solr.xml - -This is the primary configuration file Solr looks for when starting; -it specifies high-level configuration options that apply to all -of your Solr cores, such as cluster-wide SolrCloud settings like -the ZooKeeper client timeout. - -In addition, you can also declare Solr cores in this file, however -it is recommended to just use automatic core discovery instead of -listing cores in solr.xml. - -If no solr.xml file is found, then Solr assumes that there should be -a single SolrCore named "collection1" and that the "Instance Directory" -for collection1 should be the same as the Solr Home Directory. - -For more information about solr.xml, please see: -https://solr.apache.org/guide/solr-cores-and-solr-xml.html - -### Individual SolrCore Instance Directories - -Although solr.xml can be configured to look for SolrCore Instance Directories -in any path, simple sub-directories of the Solr Home Dir using relative paths -are common for many installations. - -### Core Discovery - -During startup, Solr will scan sub-directories of Solr home looking for -a specific file named core.properties. If core.properties is found in a -sub-directory (at any depth), Solr will initialize a core using the properties -defined in core.properties. For an example of core.properties, please see: - -example/solr/collection1/core.properties - -For more information about core discovery, please see: - -https://solr.apache.org/guide/core-discovery.html - -### A Shared 'lib' Directory - -Although solr.xml can be configured with an optional "sharedLib" attribute -that can point to any path, it is common to use a "./lib" sub-directory of the -Solr Home Directory. - -### ZooKeeper Files - -When using SolrCloud using the embedded ZooKeeper option for Solr, it is -common to have a "zoo.cfg" file and "zoo_data" directories in the Solr Home -Directory. Please see the SolrCloud documentation for more details. - -https://solr.apache.org/guide/solrcloud.html diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/managed-schema.xml b/search-solr/bitnami/solr/server/solr/configsets/_default/conf/managed-schema.xml deleted file mode 100644 index 8bd61ed7..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/managed-schema.xml +++ /dev/null @@ -1,1031 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_rest_managed.json b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_rest_managed.json deleted file mode 100644 index 6a4aec39..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_rest_managed.json +++ /dev/null @@ -1 +0,0 @@ -{"initArgs":{},"managedList":[]} diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_stopwords_english.json b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_stopwords_english.json deleted file mode 100644 index a694e5c3..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_stopwords_english.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "initArgs":{"ignoreCase":true}, - "managedList":[ - "a", - "an", - "and", - "are", - "as", - "at", - "be", - "but", - "by", - "for", - "if", - "in", - "into", - "is", - "it", - "no", - "not", - "of", - "on", - "or", - "stopworda", - "stopwordb", - "such", - "that", - "the", - "their", - "then", - "there", - "these", - "they", - "this", - "to", - "was", - "will", - "with"]} diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_synonyms_english.json b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_synonyms_english.json deleted file mode 100644 index 869bdce0..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/_schema_analysis_synonyms_english.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "initArgs":{ - "ignoreCase":true, - "format":"solr" - }, - "managedMap":{ - "GB":["GiB","Gigabyte"], - "happy":["glad","joyful"], - "TV":["Television"] - } -} diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/currency.xml b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/currency.xml deleted file mode 100644 index 3a9c58af..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/currency.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml deleted file mode 100644 index b4072d04..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ca.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f9..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_fr.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ga.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa3..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_it.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/hyphenations_ga.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stemdict_nl.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stemdict_nl.txt deleted file mode 100644 index 44107297..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stoptags_ja.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b75084..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ar.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_bg.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ca.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65dea..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ckb.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ckb.txt deleted file mode 100644 index 87abf118..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ckb.txt +++ /dev/null @@ -1,136 +0,0 @@ -# set of kurdish stopwords -# note these have been normalized with our scheme (e represented with U+06D5, etc) -# constructed from: -# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) -# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) -# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc - -# and -و -# which -کە -# of -ی -# made/did -کرد -# that/which -ئەوەی -# on/head -سەر -# two -دوو -# also -هەروەها -# from/that -لەو -# makes/does -دەکات -# some -چەند -# every -هەر - -# demonstratives -# that -ئەو -# this -ئەم - -# personal pronouns -# I -من -# we -ئێمە -# you -تۆ -# you -ئێوە -# he/she/it -ئەو -# they -ئەوان - -# prepositions -# to/with/by -بە -پێ -# without -بەبێ -# along with/while/during -بەدەم -# in the opinion of -بەلای -# according to -بەپێی -# before -بەرلە -# in the direction of -بەرەوی -# in front of/toward -بەرەوە -# before/in the face of -بەردەم -# without -بێ -# except for -بێجگە -# for -بۆ -# on/in -دە -تێ -# with -دەگەڵ -# after -دوای -# except for/aside from -جگە -# in/from -لە -لێ -# in front of/before/because of -لەبەر -# between/among -لەبەینی -# concerning/about -لەبابەت -# concerning -لەبارەی -# instead of -لەباتی -# beside -لەبن -# instead of -لەبرێتی -# behind -لەدەم -# with/together with -لەگەڵ -# by -لەلایەن -# within -لەناو -# between/among -لەنێو -# for the sake of -لەپێناوی -# with respect to -لەرەوی -# by means of/for -لەرێ -# for the sake of -لەرێگا -# on/on top of/according to -لەسەر -# under -لەژێر -# between/among -ناو -# between/among -نێوان -# after -پاش -# before -پێش -# like -وەک diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_cz.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097d..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_da.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_de.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_el.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_en.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_es.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_et.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_et.txt deleted file mode 100644 index 1b06a134..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_et.txt +++ /dev/null @@ -1,1603 +0,0 @@ -# Estonian stopwords list -all -alla -allapoole -allpool -alt -altpoolt -eel -eespool -enne -hommikupoole -hoolimata -ilma -kaudu -keset -kesk -kohe -koos -kuhupoole -kuni -kuspool -kustpoolt -kõige -käsikäes -lappi -ligi -läbi -mööda -paitsi -peale -pealepoole -pealpool -pealt -pealtpoolt -piki -pikku -piku -pikuti -põiki -pärast -päri -risti -sealpool -sealtpoolt -seespool -seltsis -siiapoole -siinpool -siitpoolt -sinnapoole -sissepoole -taga -tagantpoolt -tagapidi -tagapool -taha -tahapoole -teispool -teispoole -tänu -tükkis -vaatamata -vastu -väljapoole -väljaspool -väljastpoolt -õhtupoole -ühes -ühestükis -ühestükkis -ülalpool -ülaltpoolt -üle -ülespoole -ülevalpool -ülevaltpoolt -ümber -ümbert -aegu -aegus -alguks -algul -algule -algult -alguni -all -alla -alt -alul -alutsi -arvel -asemel -asemele -eel -eeli -ees -eesotsas -eest -eestotsast -esitsi -ette -etteotsa -haaval -heaks -hoolimata -hulgas -hulgast -hulka -jalgu -jalus -jalust -jaoks -jooksul -juurde -juures -juurest -jälil -jälile -järel -järele -järelt -järgi -kaasas -kallal -kallale -kallalt -kamul -kannul -kannule -kannult -kaudu -kaupa -keskel -keskele -keskelt -keskis -keskpaiku -kestel -kestes -kilda -killas -killast -kimpu -kimpus -kiuste -kohal -kohale -kohalt -kohaselt -kohe -kohta -koos -korral -kukil -kukile -kukilt -kulul -kõrva -kõrval -kõrvale -kõrvalt -kõrvas -kõrvast -käekõrval -käekõrvale -käekõrvalt -käes -käest -kätte -külge -küljes -küljest -küüsi -küüsis -küüsist -ligi -ligidal -ligidale -ligidalt -aegu -aegus -alguks -algul -algule -algult -alguni -all -alla -alt -alul -alutsi -arvel -asemel -asemele -eel -eeli -ees -eesotsas -eest -eestotsast -esitsi -ette -etteotsa -haaval -heaks -hoolimata -hulgas -hulgast -hulka -jalgu -jalus -jalust -jaoks -jooksul -juurde -juures -juurest -jälil -jälile -järel -järele -järelt -järgi -kaasas -kallal -kallale -kallalt -kamul -kannul -kannule -kannult -kaudu -kaupa -keskel -keskele -keskelt -keskis -keskpaiku -kestel -kestes -kilda -killas -killast -kimpu -kimpus -kiuste -kohal -kohale -kohalt -kohaselt -kohe -kohta -koos -korral -kukil -kukile -kukilt -kulul -kõrva -kõrval -kõrvale -kõrvalt -kõrvas -kõrvast -käekõrval -käekõrvale -käekõrvalt -käes -käest -kätte -külge -küljes -küljest -küüsi -küüsis -küüsist -ligi -ligidal -ligidale -ligidalt -lool -läbi -lähedal -lähedale -lähedalt -man -mant -manu -meelest -mööda -nahas -nahka -nahkas -najal -najale -najalt -nõjal -nõjale -otsa -otsas -otsast -paigale -paigu -paiku -peal -peale -pealt -perra -perrä -pidi -pihta -piki -pikku -pool -poole -poolest -poolt -puhul -puksiiris -pähe -päralt -päras -pärast -päri -ringi -ringis -risust -saadetusel -saadik -saatel -saati -seas -seast -sees -seest -sekka -seljataga -seltsi -seltsis -seltsist -sisse -slepis -suhtes -šlepis -taga -tagant -tagantotsast -tagaotsas -tagaselja -tagasi -tagast -tagutsi -taha -tahaotsa -takka -tarvis -tasa -tuuri -tuuris -tõttu -tükkis -uhal -vaatamata -vahel -vahele -vahelt -vahepeal -vahepeale -vahepealt -vahetsi -varal -varale -varul -vastas -vastast -vastu -veerde -veeres -viisi -võidu -võrd -võrdki -võrra -võrragi -väel -väele -vältel -väärt -väärtki -äärde -ääre -ääres -äärest -ühes -üle -ümber -ümbert -a -abil -aina -ainult -alalt -alates -alati -alles -b -c -d -e -eales -ealeski -edasi -edaspidi -eelkõige -eemal -ei -eks -end -enda -enese -ennem -esialgu -f -g -h -hoopis -i -iganes -igatahes -igati -iial -iialgi -ikka -ikkagi -ilmaski -iseenda -iseenese -iseenesest -isegi -j -jah -ju -juba -juhul -just -järelikult -k -ka -kah -kas -kasvõi -keda -kestahes -kogu -koguni -kohati -kokku -kuhu -kuhugi -kuidagi -kuidas -kunagi -kus -kusagil -kusjuures -kuskil -kust -kõigepealt -küll -l -liiga -lisaks -m -miks -mil -millal -millalgi -mispärast -mistahes -mistõttu -mitte -muide -muidu -muidugi -muist -mujal -mujale -mujalt -mõlemad -mõnda -mõne -mõnikord -n -nii -niikaua -niimoodi -niipaljuke -niisama -niisiis -niivõrd -nõnda -nüüd -o -omaette -omakorda -omavahel -ometi -p -palju -paljuke -palju-palju -peaaegu -peagi -peamiselt -pigem -pisut -praegu -päris -r -rohkem -s -samas -samuti -seal -sealt -sedakorda -sedapuhku -seega -seejuures -seejärel -seekord -seepärast -seetõttu -sellepärast -seni -sestap -siia -siiani -siin -siinkohal -siis -siiski -siit -sinna -suht -š -z -ž -t -teel -teineteise -tõesti -täiesti -u -umbes -v -w -veel -veelgi -vist -võibolla -võib-olla -väga -vähemalt -välja -väljas -väljast -õ -ä -ära -ö -ü -ühtlasi -üksi -ükskõik -ülal -ülale -ülalt -üles -ülesse -üleval -ülevalt -ülimalt -üsna -x -y -aga -ega -ehk -ehkki -elik -ellik -enge -ennegu -ent -et -ja -justkui -kui -kuid -kuigi -kuivõrd -kuna -kuni -kut -mistab -muudkui -nagu -nigu -ning -olgugi -otsekui -otsenagu -selmet -sest -sestab -vaid -või -aa -adaa -adjöö -ae -ah -ahaa -ahah -ah-ah-ah -ah-haa -ahoi -ai -aidaa -aidu-raidu -aih -aijeh -aituma -aitäh -aitüma -ammuu -amps -ampsti -aptsih -ass -at -ata -at-at-at -atsih -atsihh -auh -bai-bai -bingo -braavo -brr -ee -eeh -eh -ehee -eheh -eh-eh-hee -eh-eh-ee -ehei -ehh -ehhee -einoh -ena -ennäe -ennäh -fuh -fui -fuih -haa -hah -hahaa -hah-hah-hah -halleluuja -hallo -halloo -hass -hee -heh -he-he-hee -hei -heldeke(ne) -heureka -hihii -hip-hip-hurraa -hmh -hmjah -hoh-hoh-hoo -hohoo -hoi -hollallaa -hoo -hoplaa -hopp -hops -hopsassaa -hopsti -hosianna -huh -huidii -huist -hurjah -hurjeh -hurjoh -hurjuh -hurraa -huu -hõhõh -hõi -hõissa -hõissassa -hõk -hõkk -häh -hä-hä-hää -hüvasti -ih-ah-haa -ih-ih-hii -ii-ha-ha -issake -issakene -isver -jaa-ah -ja-ah -jaah -janäe -jeeh -jeerum -jeever -jessas -jestas -juhhei -jumalaga -jumalime -jumaluke -jumalukene -jutas -kaaps -kaapsti -kaasike -kae -kalps -kalpsti -kannäe -kanäe -kappadi -kaps -kapsti -karkõmm -karkäuh -karkääks -karkääksti -karmauh -karmauhti -karnaps -karnapsti -karniuhti -karpartsaki -karpauh -karpauhti -karplauh -karplauhti -karprauh -karprauhti -karsumdi -karsumm -kartsumdi -kartsumm -karviuh -karviuhti -kaske -kassa -kauh -kauhti -keh -keksti -kepsti -khe -khm -kih -kiiks -kiiksti -kiis -kiiss -kikerii -kikerikii -kili -kilk -kilk-kõlk -kilks -kilks-kolks -kilks-kõlks -kill -killadi -killadi|-kolladi -killadi-kõlladi -killa-kolla -killa-kõlla -kill-kõll -kimps-komps -kipp -kips-kõps -kiriküüt -kirra-kõrra -kirr-kõrr -kirts -klaps -klapsti -klirdi -klirr -klonks -klops -klopsti -kluk -klu-kluu -klõks -klõksti -klõmdi -klõmm -klõmpsti -klõnks -klõnksti -klõps -klõpsti -kläu -kohva-kohva -kok -koks -koksti -kolaki -kolk -kolks -kolksti -koll -kolladi -komp -komps -kompsti -kop -kopp -koppadi -kops -kopsti -kossu -kotsu -kraa -kraak -kraaks -kraaps -kraapsti -krahh -kraks -kraksti -kraps -krapsti -krauh -krauhti -kriiks -kriiksti -kriips -kriips-kraaps -kripa-krõpa -krips-kraps -kriuh -kriuks -kriuksti -kromps -kronk -kronks -krooks -kruu -krõks -krõksti -krõpa -krõps -krõpsti -krõuh -kräu -kräuh -kräuhti -kräuks -kss -kukeleegu -kukku -kuku -kulu -kurluu -kurnäu -kuss -kussu -kõks -kõksti -kõldi -kõlks -kõlksti -kõll -kõmaki -kõmdi -kõmm -kõmps -kõpp -kõps -kõpsadi -kõpsat -kõpsti -kõrr -kõrra-kõrra -kõss -kõtt -kõõksti -kärr -kärts -kärtsti -käuks -käuksti -kääga -kääks -kääksti -köh -köki-möki -köksti -laks -laksti -lampsti -larts -lartsti -lats -latsti -leelo -legoo -lehva -liiri-lõõri -lika-lõka -likat-lõkat -limpsti -lips -lipsti -lirts -lirtsaki -lirtsti -lonksti -lops -lopsti -lorts -lortsti -luks -lups -lupsti -lurts -lurtsti -lõks -lõksti -lõmps -lõmpsti -lõnks -lõnksti -lärts -lärtsti -läts -lätsti -lörts -lörtsti -lötsti -lööps -lööpsti -marss -mats -matsti -mauh -mauhti -mh -mhh -mhmh -miau -mjaa -mkm -m-mh -mnjaa -mnjah -moens -mulks -mulksti -mull-mull -mull-mull-mull -muu -muuh -mõh -mõmm -mäh -mäts -mäu -mää -möh -möh-öh-ää -möö -müh-müh -mühüh -müks -müksti -müraki -mürr -mürts -mürtsaki -mürtsti -mütaku -müta-mäta -müta-müta -müt-müt -müt-müt-müt -müts -mütsti -mütt -naa -naah -nah -naks -naksti -nanuu -naps -napsti -nilpsti -nipsti -nirr -niuh -niuh-näuh -niuhti -noh -noksti -nolpsti -nonoh -nonoo -nonäh -noo -nooh -nooks -norr -nurr -nuuts -nõh -nõhh -nõka-nõka -nõks -nõksat-nõksat -nõks-nõks -nõksti -nõõ -nõõh -näeh -näh -nälpsti -nämm-nämm -näpsti -näts -nätsti -näu -näuh -näuhti -näuks -näuksti -nääh -nääks -nühkat-nühkat -oeh -oh -ohh -ohhh -oh-hoi -oh-hoo -ohoh -oh-oh-oo -oh-oh-hoo -ohoi -ohoo -oi -oih -oijee -oijeh -oo -ooh -oo-oh -oo-ohh -oot -ossa -ot -paa -pah -pahh -pakaa -pamm -pantsti -pardon -pardonks -parlartsti -parts -partsti -partsumdi -partsumm -pastoi -pats -patst -patsti -pau -pauh -pauhti -pele -pfui -phuh -phuuh -phäh -phähh -piiks -piip -piiri-pääri -pimm -pimm-pamm -pimm-pomm -pimm-põmm -piraki -piuks -piu-pau -plaks -plaksti -plarts -plartsti -plats -platsti -plauh -plauhh -plauhti -pliks -pliks-plaks -plinn -pliraki -plirts -plirtsti -pliu -pliuh -ploks -plotsti -plumps -plumpsti -plõks -plõksti -plõmdi -plõmm -plõnn -plärr -plärts -plärtsat -plärtsti -pläu -pläuh -plää -plörtsat -pomm -popp -pops -popsti -ports -pot -pots -potsti -pott -praks -praksti -prants -prantsaki -prantsti -prassai -prauh -prauhh -prauhti -priks -priuh -priuhh -priuh-prauh -proosit -proost -prr -prrr -prõks -prõksti -prõmdi -prõmm -prõntsti -prääk -prääks -pst -psst -ptrr -ptruu -ptüi -puh -puhh -puksti -pumm -pumps -pup-pup-pup -purts -puuh -põks -põksti -põmdi -põmm -põmmadi -põnks -põnn -põnnadi -põnt -põnts -põntsti -põraki -põrr -põrra-põrra -päh -pähh -päntsti -pää -pöörd -püh -raks -raksti -raps -rapsti -ratataa -rauh -riips -riipsti -riks -riks-raks -rips-raps -rivitult -robaki -rops -ropsaki -ropsti -ruik -räntsti -räts -röh -röhh -sah -sahh -sahkat -saps -sapsti -sauh -sauhti -servus -sihkadi-sahkadi -sihka-sahka -sihkat-sahkat -silks -silk-solk -sips -sipsti -sirr -sirr-sorr -sirts -sirtsti -siu -siuh -siuh-sauh -siuh-säuh -siuhti -siuks -siuts -skool -so -soh -solks -solksti -solpsti -soo -sooh -so-oh -soo-oh -sopp -sops -sopsti -sorr -sorts -sortsti -so-soo -soss -soss-soss -ss -sss -sst -stopp -suhkat-sahkat -sulk -sulks -sulksti -sull -sulla-sulla -sulpa-sulpa -sulps -sulpsti -sumaki -sumdi -summ -summat-summat -sups -supsaku -supsti -surts -surtsti -suss -susti -suts -sutsti -säh -sähke -särts -särtsti -säu -säuh -säuhti -taevake -taevakene -takk -tere -terekest -tibi-tibi -tikk-takk -tiks -tilk -tilks -till -tilla-talla -till-tall -tilulii -tinn -tip -tip-tap -tirr -tirtsti -tiu -tjaa -tjah -tohhoh -tohhoo -tohoh -tohoo -tok -tokk -toks -toksti -tonks -tonksti -tota -totsti -tot-tot -tprr -tpruu -trah -trahh -trallallaa -trill -trillallaa -trr -trrr -tsah -tsahh -tsilk -tsilk-tsolk -tsirr -tsiuh -tskae -tsolk -tss -tst -tsst -tsuhh -tsuk -tsumm -tsurr -tsäuh -tšao -tšš -tššš -tuk -tuks -turts -turtsti -tutki -tutkit -tutu-lutu -tutulutu -tuut -tuutu-luutu -tõks -tötsti -tümps -uh -uhh -uh-huu -uhtsa -uhtsaa -uhuh -uhuu -ui -uih -uih-aih -uijah -uijeh -uist -uit -uka -upsti -uraa -urjah -urjeh -urjoh -urjuh -urr -urraa -ust -utu -uu -uuh -vaak -vaat -vae -vaeh -vai -vat -vau -vhüüt -vidiit -viiks -vilks -vilksti -vinki-vinki -virdi -virr -viu -viudi -viuh -viuhti -voeh -voh -vohh -volks -volksti -vooh -vops -vopsti -vot -vuh -vuhti -vuih -vulks -vulksti -vull -vulpsti -vups -vupsaki -vupsaku -vupsti -vurdi -vurr -vurra-vurra -vurts -vurtsti -vutt -võe -võeh -või -võih -võrr -võts -võtt -vääks -õe -õits -õk -õkk -õrr -õss -õuh -äh -ähh -ähhähhää -äh-hää -äh-äh-hää -äiu -äiu-ää -äss -ää -ääh -äähh -öh -öhh -ök -üh -eelmine -eikeegi -eimiski -emb-kumb -enam -enim -iga -igasugune -igaüks -ise -isesugune -järgmine -keegi -kes -kumb -kumbki -kõik -meiesugune -meietaoline -midagi -mihuke -mihukene -milletaoline -milline -mina -minake -mingi -mingisugune -minusugune -minutaoline -mis -miski -miskisugune -missugune -misuke -mitmes -mitmesugune -mitu -mitu-mitu -mitu-setu -muu -mõlema -mõnesugune -mõni -mõningane -mõningas -mäherdune -määrane -naasugune -need -nemad -nendesugune -nendetaoline -nihuke -nihukene -niimitu -niisamasugune -niisugune -nisuke -nisukene -oma -omaenese -omasugune -omataoline -pool -praegune -sama -samasugune -samataoline -see -seesama -seesamane -seesamune -seesinane -seesugune -selline -sihuke -sihukene -sina -sinusugune -sinutaoline -siuke -siukene -säherdune -säärane -taoline -teiesugune -teine -teistsugune -tema -temake -temakene -temasugune -temataoline -too -toosama -toosamane -üks -üksteise -hakkama -minema -olema -pidama -saama -tegema -tulema -võima diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_eu.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fa.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fi.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a0..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fr.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ga.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d74..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_gl.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hi.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb0..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hu.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hy.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50f..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_id.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_it.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc77..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ja.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_lv.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c0..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_nl.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeac..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_no.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28b..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_pt.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ro.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ru.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_sv.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_th.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_tr.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/userdict_ja.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# , ... , ... , -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema deleted file mode 100644 index aad0c822..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema +++ /dev/null @@ -1,1202 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - id - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-FoldToASCII.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-FoldToASCII.txt deleted file mode 100644 index 9a84b6ea..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-FoldToASCII.txt +++ /dev/null @@ -1,3813 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# This map converts alphabetic, numeric, and symbolic Unicode characters -# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode -# block) into their ASCII equivalents, if one exists. -# -# Characters from the following Unicode blocks are converted; however, only -# those characters with reasonable ASCII alternatives are converted: -# -# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf -# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf -# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf -# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf -# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf -# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf -# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf -# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf -# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf -# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf -# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf -# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf -# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf -# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf -# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf -# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf -# -# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode -# -# The set of character conversions supported by this map is a superset of -# those supported by the map represented by mapping-ISOLatin1Accent.txt. -# -# See the bottom of this file for the Perl script used to generate the contents -# of this file (without this header) from ASCIIFoldingFilter.java. - - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - - -# À [LATIN CAPITAL LETTER A WITH GRAVE] -"\u00C0" => "A" - -# Á [LATIN CAPITAL LETTER A WITH ACUTE] -"\u00C1" => "A" - -#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] -"\u00C2" => "A" - -# à [LATIN CAPITAL LETTER A WITH TILDE] -"\u00C3" => "A" - -# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] -"\u00C4" => "A" - -# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] -"\u00C5" => "A" - -# Ā [LATIN CAPITAL LETTER A WITH MACRON] -"\u0100" => "A" - -# Ă [LATIN CAPITAL LETTER A WITH BREVE] -"\u0102" => "A" - -# Ą [LATIN CAPITAL LETTER A WITH OGONEK] -"\u0104" => "A" - -# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] -"\u018F" => "A" - -# Ǎ [LATIN CAPITAL LETTER A WITH CARON] -"\u01CD" => "A" - -# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] -"\u01DE" => "A" - -# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E0" => "A" - -# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FA" => "A" - -# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] -"\u0200" => "A" - -# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] -"\u0202" => "A" - -# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] -"\u0226" => "A" - -# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] -"\u023A" => "A" - -# ᴀ [LATIN LETTER SMALL CAPITAL A] -"\u1D00" => "A" - -# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] -"\u1E00" => "A" - -# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] -"\u1EA0" => "A" - -# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] -"\u1EA2" => "A" - -# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA4" => "A" - -# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA6" => "A" - -# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA8" => "A" - -# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAA" => "A" - -# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAC" => "A" - -# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] -"\u1EAE" => "A" - -# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] -"\u1EB0" => "A" - -# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB2" => "A" - -# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] -"\u1EB4" => "A" - -# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB6" => "A" - -# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] -"\u24B6" => "A" - -# A [FULLWIDTH LATIN CAPITAL LETTER A] -"\uFF21" => "A" - -# à [LATIN SMALL LETTER A WITH GRAVE] -"\u00E0" => "a" - -# á [LATIN SMALL LETTER A WITH ACUTE] -"\u00E1" => "a" - -# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] -"\u00E2" => "a" - -# ã [LATIN SMALL LETTER A WITH TILDE] -"\u00E3" => "a" - -# ä [LATIN SMALL LETTER A WITH DIAERESIS] -"\u00E4" => "a" - -# å [LATIN SMALL LETTER A WITH RING ABOVE] -"\u00E5" => "a" - -# ā [LATIN SMALL LETTER A WITH MACRON] -"\u0101" => "a" - -# ă [LATIN SMALL LETTER A WITH BREVE] -"\u0103" => "a" - -# ą [LATIN SMALL LETTER A WITH OGONEK] -"\u0105" => "a" - -# ǎ [LATIN SMALL LETTER A WITH CARON] -"\u01CE" => "a" - -# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] -"\u01DF" => "a" - -# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] -"\u01E1" => "a" - -# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] -"\u01FB" => "a" - -# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] -"\u0201" => "a" - -# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] -"\u0203" => "a" - -# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] -"\u0227" => "a" - -# ɐ [LATIN SMALL LETTER TURNED A] -"\u0250" => "a" - -# ə [LATIN SMALL LETTER SCHWA] -"\u0259" => "a" - -# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] -"\u025A" => "a" - -# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] -"\u1D8F" => "a" - -# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] -"\u1D95" => "a" - -# ạ [LATIN SMALL LETTER A WITH RING BELOW] -"\u1E01" => "a" - -# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] -"\u1E9A" => "a" - -# ạ [LATIN SMALL LETTER A WITH DOT BELOW] -"\u1EA1" => "a" - -# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] -"\u1EA3" => "a" - -# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] -"\u1EA5" => "a" - -# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] -"\u1EA7" => "a" - -# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EA9" => "a" - -# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] -"\u1EAB" => "a" - -# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] -"\u1EAD" => "a" - -# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] -"\u1EAF" => "a" - -# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] -"\u1EB1" => "a" - -# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] -"\u1EB3" => "a" - -# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] -"\u1EB5" => "a" - -# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] -"\u1EB7" => "a" - -# ₐ [LATIN SUBSCRIPT SMALL LETTER A] -"\u2090" => "a" - -# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] -"\u2094" => "a" - -# ⓐ [CIRCLED LATIN SMALL LETTER A] -"\u24D0" => "a" - -# ⱥ [LATIN SMALL LETTER A WITH STROKE] -"\u2C65" => "a" - -# Ɐ [LATIN CAPITAL LETTER TURNED A] -"\u2C6F" => "a" - -# a [FULLWIDTH LATIN SMALL LETTER A] -"\uFF41" => "a" - -# Ꜳ [LATIN CAPITAL LETTER AA] -"\uA732" => "AA" - -# Æ [LATIN CAPITAL LETTER AE] -"\u00C6" => "AE" - -# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] -"\u01E2" => "AE" - -# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] -"\u01FC" => "AE" - -# ᴁ [LATIN LETTER SMALL CAPITAL AE] -"\u1D01" => "AE" - -# Ꜵ [LATIN CAPITAL LETTER AO] -"\uA734" => "AO" - -# Ꜷ [LATIN CAPITAL LETTER AU] -"\uA736" => "AU" - -# Ꜹ [LATIN CAPITAL LETTER AV] -"\uA738" => "AV" - -# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] -"\uA73A" => "AV" - -# Ꜽ [LATIN CAPITAL LETTER AY] -"\uA73C" => "AY" - -# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] -"\u249C" => "(a)" - -# ꜳ [LATIN SMALL LETTER AA] -"\uA733" => "aa" - -# æ [LATIN SMALL LETTER AE] -"\u00E6" => "ae" - -# ǣ [LATIN SMALL LETTER AE WITH MACRON] -"\u01E3" => "ae" - -# ǽ [LATIN SMALL LETTER AE WITH ACUTE] -"\u01FD" => "ae" - -# ᴂ [LATIN SMALL LETTER TURNED AE] -"\u1D02" => "ae" - -# ꜵ [LATIN SMALL LETTER AO] -"\uA735" => "ao" - -# ꜷ [LATIN SMALL LETTER AU] -"\uA737" => "au" - -# ꜹ [LATIN SMALL LETTER AV] -"\uA739" => "av" - -# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] -"\uA73B" => "av" - -# ꜽ [LATIN SMALL LETTER AY] -"\uA73D" => "ay" - -# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] -"\u0181" => "B" - -# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] -"\u0182" => "B" - -# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] -"\u0243" => "B" - -# ʙ [LATIN LETTER SMALL CAPITAL B] -"\u0299" => "B" - -# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] -"\u1D03" => "B" - -# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] -"\u1E02" => "B" - -# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] -"\u1E04" => "B" - -# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] -"\u1E06" => "B" - -# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] -"\u24B7" => "B" - -# B [FULLWIDTH LATIN CAPITAL LETTER B] -"\uFF22" => "B" - -# ƀ [LATIN SMALL LETTER B WITH STROKE] -"\u0180" => "b" - -# ƃ [LATIN SMALL LETTER B WITH TOPBAR] -"\u0183" => "b" - -# ɓ [LATIN SMALL LETTER B WITH HOOK] -"\u0253" => "b" - -# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] -"\u1D6C" => "b" - -# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] -"\u1D80" => "b" - -# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] -"\u1E03" => "b" - -# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] -"\u1E05" => "b" - -# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] -"\u1E07" => "b" - -# ⓑ [CIRCLED LATIN SMALL LETTER B] -"\u24D1" => "b" - -# b [FULLWIDTH LATIN SMALL LETTER B] -"\uFF42" => "b" - -# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] -"\u249D" => "(b)" - -# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] -"\u00C7" => "C" - -# Ć [LATIN CAPITAL LETTER C WITH ACUTE] -"\u0106" => "C" - -# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] -"\u0108" => "C" - -# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] -"\u010A" => "C" - -# Č [LATIN CAPITAL LETTER C WITH CARON] -"\u010C" => "C" - -# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] -"\u0187" => "C" - -# Ȼ [LATIN CAPITAL LETTER C WITH STROKE] -"\u023B" => "C" - -# ʗ [LATIN LETTER STRETCHED C] -"\u0297" => "C" - -# ᴄ [LATIN LETTER SMALL CAPITAL C] -"\u1D04" => "C" - -# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] -"\u1E08" => "C" - -# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] -"\u24B8" => "C" - -# C [FULLWIDTH LATIN CAPITAL LETTER C] -"\uFF23" => "C" - -# ç [LATIN SMALL LETTER C WITH CEDILLA] -"\u00E7" => "c" - -# ć [LATIN SMALL LETTER C WITH ACUTE] -"\u0107" => "c" - -# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] -"\u0109" => "c" - -# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] -"\u010B" => "c" - -# č [LATIN SMALL LETTER C WITH CARON] -"\u010D" => "c" - -# ƈ [LATIN SMALL LETTER C WITH HOOK] -"\u0188" => "c" - -# ȼ [LATIN SMALL LETTER C WITH STROKE] -"\u023C" => "c" - -# ɕ [LATIN SMALL LETTER C WITH CURL] -"\u0255" => "c" - -# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] -"\u1E09" => "c" - -# ↄ [LATIN SMALL LETTER REVERSED C] -"\u2184" => "c" - -# ⓒ [CIRCLED LATIN SMALL LETTER C] -"\u24D2" => "c" - -# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] -"\uA73E" => "c" - -# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] -"\uA73F" => "c" - -# c [FULLWIDTH LATIN SMALL LETTER C] -"\uFF43" => "c" - -# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] -"\u249E" => "(c)" - -# Ð [LATIN CAPITAL LETTER ETH] -"\u00D0" => "D" - -# Ď [LATIN CAPITAL LETTER D WITH CARON] -"\u010E" => "D" - -# Đ [LATIN CAPITAL LETTER D WITH STROKE] -"\u0110" => "D" - -# Ɖ [LATIN CAPITAL LETTER AFRICAN D] -"\u0189" => "D" - -# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] -"\u018A" => "D" - -# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] -"\u018B" => "D" - -# ᴅ [LATIN LETTER SMALL CAPITAL D] -"\u1D05" => "D" - -# ᴆ [LATIN LETTER SMALL CAPITAL ETH] -"\u1D06" => "D" - -# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] -"\u1E0A" => "D" - -# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] -"\u1E0C" => "D" - -# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] -"\u1E0E" => "D" - -# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] -"\u1E10" => "D" - -# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E12" => "D" - -# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] -"\u24B9" => "D" - -# Ꝺ [LATIN CAPITAL LETTER INSULAR D] -"\uA779" => "D" - -# D [FULLWIDTH LATIN CAPITAL LETTER D] -"\uFF24" => "D" - -# ð [LATIN SMALL LETTER ETH] -"\u00F0" => "d" - -# ď [LATIN SMALL LETTER D WITH CARON] -"\u010F" => "d" - -# đ [LATIN SMALL LETTER D WITH STROKE] -"\u0111" => "d" - -# ƌ [LATIN SMALL LETTER D WITH TOPBAR] -"\u018C" => "d" - -# ȡ [LATIN SMALL LETTER D WITH CURL] -"\u0221" => "d" - -# ɖ [LATIN SMALL LETTER D WITH TAIL] -"\u0256" => "d" - -# ɗ [LATIN SMALL LETTER D WITH HOOK] -"\u0257" => "d" - -# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] -"\u1D6D" => "d" - -# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] -"\u1D81" => "d" - -# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] -"\u1D91" => "d" - -# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] -"\u1E0B" => "d" - -# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] -"\u1E0D" => "d" - -# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] -"\u1E0F" => "d" - -# ḑ [LATIN SMALL LETTER D WITH CEDILLA] -"\u1E11" => "d" - -# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] -"\u1E13" => "d" - -# ⓓ [CIRCLED LATIN SMALL LETTER D] -"\u24D3" => "d" - -# ꝺ [LATIN SMALL LETTER INSULAR D] -"\uA77A" => "d" - -# d [FULLWIDTH LATIN SMALL LETTER D] -"\uFF44" => "d" - -# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] -"\u01C4" => "DZ" - -# DZ [LATIN CAPITAL LETTER DZ] -"\u01F1" => "DZ" - -# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] -"\u01C5" => "Dz" - -# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] -"\u01F2" => "Dz" - -# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] -"\u249F" => "(d)" - -# ȸ [LATIN SMALL LETTER DB DIGRAPH] -"\u0238" => "db" - -# dž [LATIN SMALL LETTER DZ WITH CARON] -"\u01C6" => "dz" - -# dz [LATIN SMALL LETTER DZ] -"\u01F3" => "dz" - -# ʣ [LATIN SMALL LETTER DZ DIGRAPH] -"\u02A3" => "dz" - -# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] -"\u02A5" => "dz" - -# È [LATIN CAPITAL LETTER E WITH GRAVE] -"\u00C8" => "E" - -# É [LATIN CAPITAL LETTER E WITH ACUTE] -"\u00C9" => "E" - -# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] -"\u00CA" => "E" - -# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] -"\u00CB" => "E" - -# Ē [LATIN CAPITAL LETTER E WITH MACRON] -"\u0112" => "E" - -# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] -"\u0114" => "E" - -# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] -"\u0116" => "E" - -# Ę [LATIN CAPITAL LETTER E WITH OGONEK] -"\u0118" => "E" - -# Ě [LATIN CAPITAL LETTER E WITH CARON] -"\u011A" => "E" - -# Ǝ [LATIN CAPITAL LETTER REVERSED E] -"\u018E" => "E" - -# Ɛ [LATIN CAPITAL LETTER OPEN E] -"\u0190" => "E" - -# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] -"\u0204" => "E" - -# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] -"\u0206" => "E" - -# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] -"\u0228" => "E" - -# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] -"\u0246" => "E" - -# ᴇ [LATIN LETTER SMALL CAPITAL E] -"\u1D07" => "E" - -# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] -"\u1E14" => "E" - -# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] -"\u1E16" => "E" - -# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E18" => "E" - -# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] -"\u1E1A" => "E" - -# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] -"\u1E1C" => "E" - -# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] -"\u1EB8" => "E" - -# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] -"\u1EBA" => "E" - -# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] -"\u1EBC" => "E" - -# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBE" => "E" - -# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC0" => "E" - -# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC2" => "E" - -# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC4" => "E" - -# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC6" => "E" - -# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] -"\u24BA" => "E" - -# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] -"\u2C7B" => "E" - -# E [FULLWIDTH LATIN CAPITAL LETTER E] -"\uFF25" => "E" - -# è [LATIN SMALL LETTER E WITH GRAVE] -"\u00E8" => "e" - -# é [LATIN SMALL LETTER E WITH ACUTE] -"\u00E9" => "e" - -# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] -"\u00EA" => "e" - -# ë [LATIN SMALL LETTER E WITH DIAERESIS] -"\u00EB" => "e" - -# ē [LATIN SMALL LETTER E WITH MACRON] -"\u0113" => "e" - -# ĕ [LATIN SMALL LETTER E WITH BREVE] -"\u0115" => "e" - -# ė [LATIN SMALL LETTER E WITH DOT ABOVE] -"\u0117" => "e" - -# ę [LATIN SMALL LETTER E WITH OGONEK] -"\u0119" => "e" - -# ě [LATIN SMALL LETTER E WITH CARON] -"\u011B" => "e" - -# ǝ [LATIN SMALL LETTER TURNED E] -"\u01DD" => "e" - -# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] -"\u0205" => "e" - -# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] -"\u0207" => "e" - -# ȩ [LATIN SMALL LETTER E WITH CEDILLA] -"\u0229" => "e" - -# ɇ [LATIN SMALL LETTER E WITH STROKE] -"\u0247" => "e" - -# ɘ [LATIN SMALL LETTER REVERSED E] -"\u0258" => "e" - -# ɛ [LATIN SMALL LETTER OPEN E] -"\u025B" => "e" - -# ɜ [LATIN SMALL LETTER REVERSED OPEN E] -"\u025C" => "e" - -# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] -"\u025D" => "e" - -# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] -"\u025E" => "e" - -# ʚ [LATIN SMALL LETTER CLOSED OPEN E] -"\u029A" => "e" - -# ᴈ [LATIN SMALL LETTER TURNED OPEN E] -"\u1D08" => "e" - -# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] -"\u1D92" => "e" - -# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] -"\u1D93" => "e" - -# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] -"\u1D94" => "e" - -# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] -"\u1E15" => "e" - -# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] -"\u1E17" => "e" - -# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] -"\u1E19" => "e" - -# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] -"\u1E1B" => "e" - -# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] -"\u1E1D" => "e" - -# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] -"\u1EB9" => "e" - -# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] -"\u1EBB" => "e" - -# ẽ [LATIN SMALL LETTER E WITH TILDE] -"\u1EBD" => "e" - -# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] -"\u1EBF" => "e" - -# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] -"\u1EC1" => "e" - -# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1EC3" => "e" - -# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] -"\u1EC5" => "e" - -# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] -"\u1EC7" => "e" - -# ₑ [LATIN SUBSCRIPT SMALL LETTER E] -"\u2091" => "e" - -# ⓔ [CIRCLED LATIN SMALL LETTER E] -"\u24D4" => "e" - -# ⱸ [LATIN SMALL LETTER E WITH NOTCH] -"\u2C78" => "e" - -# e [FULLWIDTH LATIN SMALL LETTER E] -"\uFF45" => "e" - -# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] -"\u24A0" => "(e)" - -# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] -"\u0191" => "F" - -# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] -"\u1E1E" => "F" - -# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] -"\u24BB" => "F" - -# ꜰ [LATIN LETTER SMALL CAPITAL F] -"\uA730" => "F" - -# Ꝼ [LATIN CAPITAL LETTER INSULAR F] -"\uA77B" => "F" - -# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] -"\uA7FB" => "F" - -# F [FULLWIDTH LATIN CAPITAL LETTER F] -"\uFF26" => "F" - -# ƒ [LATIN SMALL LETTER F WITH HOOK] -"\u0192" => "f" - -# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] -"\u1D6E" => "f" - -# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] -"\u1D82" => "f" - -# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] -"\u1E1F" => "f" - -# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] -"\u1E9B" => "f" - -# ⓕ [CIRCLED LATIN SMALL LETTER F] -"\u24D5" => "f" - -# ꝼ [LATIN SMALL LETTER INSULAR F] -"\uA77C" => "f" - -# f [FULLWIDTH LATIN SMALL LETTER F] -"\uFF46" => "f" - -# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] -"\u24A1" => "(f)" - -# ff [LATIN SMALL LIGATURE FF] -"\uFB00" => "ff" - -# ffi [LATIN SMALL LIGATURE FFI] -"\uFB03" => "ffi" - -# ffl [LATIN SMALL LIGATURE FFL] -"\uFB04" => "ffl" - -# fi [LATIN SMALL LIGATURE FI] -"\uFB01" => "fi" - -# fl [LATIN SMALL LIGATURE FL] -"\uFB02" => "fl" - -# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] -"\u011C" => "G" - -# Ğ [LATIN CAPITAL LETTER G WITH BREVE] -"\u011E" => "G" - -# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] -"\u0120" => "G" - -# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] -"\u0122" => "G" - -# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] -"\u0193" => "G" - -# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] -"\u01E4" => "G" - -# ǥ [LATIN SMALL LETTER G WITH STROKE] -"\u01E5" => "G" - -# Ǧ [LATIN CAPITAL LETTER G WITH CARON] -"\u01E6" => "G" - -# ǧ [LATIN SMALL LETTER G WITH CARON] -"\u01E7" => "G" - -# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] -"\u01F4" => "G" - -# ɢ [LATIN LETTER SMALL CAPITAL G] -"\u0262" => "G" - -# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] -"\u029B" => "G" - -# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] -"\u1E20" => "G" - -# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] -"\u24BC" => "G" - -# Ᵹ [LATIN CAPITAL LETTER INSULAR G] -"\uA77D" => "G" - -# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] -"\uA77E" => "G" - -# G [FULLWIDTH LATIN CAPITAL LETTER G] -"\uFF27" => "G" - -# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] -"\u011D" => "g" - -# ğ [LATIN SMALL LETTER G WITH BREVE] -"\u011F" => "g" - -# ġ [LATIN SMALL LETTER G WITH DOT ABOVE] -"\u0121" => "g" - -# ģ [LATIN SMALL LETTER G WITH CEDILLA] -"\u0123" => "g" - -# ǵ [LATIN SMALL LETTER G WITH ACUTE] -"\u01F5" => "g" - -# ɠ [LATIN SMALL LETTER G WITH HOOK] -"\u0260" => "g" - -# ɡ [LATIN SMALL LETTER SCRIPT G] -"\u0261" => "g" - -# ᵷ [LATIN SMALL LETTER TURNED G] -"\u1D77" => "g" - -# ᵹ [LATIN SMALL LETTER INSULAR G] -"\u1D79" => "g" - -# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] -"\u1D83" => "g" - -# ḡ [LATIN SMALL LETTER G WITH MACRON] -"\u1E21" => "g" - -# ⓖ [CIRCLED LATIN SMALL LETTER G] -"\u24D6" => "g" - -# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] -"\uA77F" => "g" - -# g [FULLWIDTH LATIN SMALL LETTER G] -"\uFF47" => "g" - -# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] -"\u24A2" => "(g)" - -# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] -"\u0124" => "H" - -# Ħ [LATIN CAPITAL LETTER H WITH STROKE] -"\u0126" => "H" - -# Ȟ [LATIN CAPITAL LETTER H WITH CARON] -"\u021E" => "H" - -# ʜ [LATIN LETTER SMALL CAPITAL H] -"\u029C" => "H" - -# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] -"\u1E22" => "H" - -# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] -"\u1E24" => "H" - -# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] -"\u1E26" => "H" - -# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] -"\u1E28" => "H" - -# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] -"\u1E2A" => "H" - -# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] -"\u24BD" => "H" - -# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] -"\u2C67" => "H" - -# Ⱶ [LATIN CAPITAL LETTER HALF H] -"\u2C75" => "H" - -# H [FULLWIDTH LATIN CAPITAL LETTER H] -"\uFF28" => "H" - -# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] -"\u0125" => "h" - -# ħ [LATIN SMALL LETTER H WITH STROKE] -"\u0127" => "h" - -# ȟ [LATIN SMALL LETTER H WITH CARON] -"\u021F" => "h" - -# ɥ [LATIN SMALL LETTER TURNED H] -"\u0265" => "h" - -# ɦ [LATIN SMALL LETTER H WITH HOOK] -"\u0266" => "h" - -# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] -"\u02AE" => "h" - -# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] -"\u02AF" => "h" - -# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] -"\u1E23" => "h" - -# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] -"\u1E25" => "h" - -# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] -"\u1E27" => "h" - -# ḩ [LATIN SMALL LETTER H WITH CEDILLA] -"\u1E29" => "h" - -# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] -"\u1E2B" => "h" - -# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] -"\u1E96" => "h" - -# ⓗ [CIRCLED LATIN SMALL LETTER H] -"\u24D7" => "h" - -# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] -"\u2C68" => "h" - -# ⱶ [LATIN SMALL LETTER HALF H] -"\u2C76" => "h" - -# h [FULLWIDTH LATIN SMALL LETTER H] -"\uFF48" => "h" - -# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] -"\u01F6" => "HV" - -# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] -"\u24A3" => "(h)" - -# ƕ [LATIN SMALL LETTER HV] -"\u0195" => "hv" - -# Ì [LATIN CAPITAL LETTER I WITH GRAVE] -"\u00CC" => "I" - -# Í [LATIN CAPITAL LETTER I WITH ACUTE] -"\u00CD" => "I" - -# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] -"\u00CE" => "I" - -# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] -"\u00CF" => "I" - -# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] -"\u0128" => "I" - -# Ī [LATIN CAPITAL LETTER I WITH MACRON] -"\u012A" => "I" - -# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] -"\u012C" => "I" - -# Į [LATIN CAPITAL LETTER I WITH OGONEK] -"\u012E" => "I" - -# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] -"\u0130" => "I" - -# Ɩ [LATIN CAPITAL LETTER IOTA] -"\u0196" => "I" - -# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] -"\u0197" => "I" - -# Ǐ [LATIN CAPITAL LETTER I WITH CARON] -"\u01CF" => "I" - -# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] -"\u0208" => "I" - -# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] -"\u020A" => "I" - -# ɪ [LATIN LETTER SMALL CAPITAL I] -"\u026A" => "I" - -# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] -"\u1D7B" => "I" - -# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] -"\u1E2C" => "I" - -# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2E" => "I" - -# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] -"\u1EC8" => "I" - -# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] -"\u1ECA" => "I" - -# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] -"\u24BE" => "I" - -# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] -"\uA7FE" => "I" - -# I [FULLWIDTH LATIN CAPITAL LETTER I] -"\uFF29" => "I" - -# ì [LATIN SMALL LETTER I WITH GRAVE] -"\u00EC" => "i" - -# í [LATIN SMALL LETTER I WITH ACUTE] -"\u00ED" => "i" - -# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] -"\u00EE" => "i" - -# ï [LATIN SMALL LETTER I WITH DIAERESIS] -"\u00EF" => "i" - -# ĩ [LATIN SMALL LETTER I WITH TILDE] -"\u0129" => "i" - -# ī [LATIN SMALL LETTER I WITH MACRON] -"\u012B" => "i" - -# ĭ [LATIN SMALL LETTER I WITH BREVE] -"\u012D" => "i" - -# į [LATIN SMALL LETTER I WITH OGONEK] -"\u012F" => "i" - -# ı [LATIN SMALL LETTER DOTLESS I] -"\u0131" => "i" - -# ǐ [LATIN SMALL LETTER I WITH CARON] -"\u01D0" => "i" - -# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] -"\u0209" => "i" - -# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] -"\u020B" => "i" - -# ɨ [LATIN SMALL LETTER I WITH STROKE] -"\u0268" => "i" - -# ᴉ [LATIN SMALL LETTER TURNED I] -"\u1D09" => "i" - -# ᵢ [LATIN SUBSCRIPT SMALL LETTER I] -"\u1D62" => "i" - -# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] -"\u1D7C" => "i" - -# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] -"\u1D96" => "i" - -# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] -"\u1E2D" => "i" - -# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] -"\u1E2F" => "i" - -# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] -"\u1EC9" => "i" - -# ị [LATIN SMALL LETTER I WITH DOT BELOW] -"\u1ECB" => "i" - -# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] -"\u2071" => "i" - -# ⓘ [CIRCLED LATIN SMALL LETTER I] -"\u24D8" => "i" - -# i [FULLWIDTH LATIN SMALL LETTER I] -"\uFF49" => "i" - -# IJ [LATIN CAPITAL LIGATURE IJ] -"\u0132" => "IJ" - -# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] -"\u24A4" => "(i)" - -# ij [LATIN SMALL LIGATURE IJ] -"\u0133" => "ij" - -# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] -"\u0134" => "J" - -# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] -"\u0248" => "J" - -# ᴊ [LATIN LETTER SMALL CAPITAL J] -"\u1D0A" => "J" - -# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] -"\u24BF" => "J" - -# J [FULLWIDTH LATIN CAPITAL LETTER J] -"\uFF2A" => "J" - -# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] -"\u0135" => "j" - -# ǰ [LATIN SMALL LETTER J WITH CARON] -"\u01F0" => "j" - -# ȷ [LATIN SMALL LETTER DOTLESS J] -"\u0237" => "j" - -# ɉ [LATIN SMALL LETTER J WITH STROKE] -"\u0249" => "j" - -# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] -"\u025F" => "j" - -# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] -"\u0284" => "j" - -# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] -"\u029D" => "j" - -# ⓙ [CIRCLED LATIN SMALL LETTER J] -"\u24D9" => "j" - -# ⱼ [LATIN SUBSCRIPT SMALL LETTER J] -"\u2C7C" => "j" - -# j [FULLWIDTH LATIN SMALL LETTER J] -"\uFF4A" => "j" - -# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] -"\u24A5" => "(j)" - -# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] -"\u0136" => "K" - -# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] -"\u0198" => "K" - -# Ǩ [LATIN CAPITAL LETTER K WITH CARON] -"\u01E8" => "K" - -# ᴋ [LATIN LETTER SMALL CAPITAL K] -"\u1D0B" => "K" - -# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] -"\u1E30" => "K" - -# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] -"\u1E32" => "K" - -# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] -"\u1E34" => "K" - -# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] -"\u24C0" => "K" - -# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] -"\u2C69" => "K" - -# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] -"\uA740" => "K" - -# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] -"\uA742" => "K" - -# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA744" => "K" - -# K [FULLWIDTH LATIN CAPITAL LETTER K] -"\uFF2B" => "K" - -# ķ [LATIN SMALL LETTER K WITH CEDILLA] -"\u0137" => "k" - -# ƙ [LATIN SMALL LETTER K WITH HOOK] -"\u0199" => "k" - -# ǩ [LATIN SMALL LETTER K WITH CARON] -"\u01E9" => "k" - -# ʞ [LATIN SMALL LETTER TURNED K] -"\u029E" => "k" - -# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] -"\u1D84" => "k" - -# ḱ [LATIN SMALL LETTER K WITH ACUTE] -"\u1E31" => "k" - -# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] -"\u1E33" => "k" - -# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] -"\u1E35" => "k" - -# ⓚ [CIRCLED LATIN SMALL LETTER K] -"\u24DA" => "k" - -# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] -"\u2C6A" => "k" - -# ꝁ [LATIN SMALL LETTER K WITH STROKE] -"\uA741" => "k" - -# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] -"\uA743" => "k" - -# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] -"\uA745" => "k" - -# k [FULLWIDTH LATIN SMALL LETTER K] -"\uFF4B" => "k" - -# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] -"\u24A6" => "(k)" - -# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] -"\u0139" => "L" - -# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] -"\u013B" => "L" - -# Ľ [LATIN CAPITAL LETTER L WITH CARON] -"\u013D" => "L" - -# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] -"\u013F" => "L" - -# Ł [LATIN CAPITAL LETTER L WITH STROKE] -"\u0141" => "L" - -# Ƚ [LATIN CAPITAL LETTER L WITH BAR] -"\u023D" => "L" - -# ʟ [LATIN LETTER SMALL CAPITAL L] -"\u029F" => "L" - -# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] -"\u1D0C" => "L" - -# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] -"\u1E36" => "L" - -# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] -"\u1E38" => "L" - -# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] -"\u1E3A" => "L" - -# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3C" => "L" - -# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] -"\u24C1" => "L" - -# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] -"\u2C60" => "L" - -# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] -"\u2C62" => "L" - -# Ꝇ [LATIN CAPITAL LETTER BROKEN L] -"\uA746" => "L" - -# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] -"\uA748" => "L" - -# Ꞁ [LATIN CAPITAL LETTER TURNED L] -"\uA780" => "L" - -# L [FULLWIDTH LATIN CAPITAL LETTER L] -"\uFF2C" => "L" - -# ĺ [LATIN SMALL LETTER L WITH ACUTE] -"\u013A" => "l" - -# ļ [LATIN SMALL LETTER L WITH CEDILLA] -"\u013C" => "l" - -# ľ [LATIN SMALL LETTER L WITH CARON] -"\u013E" => "l" - -# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] -"\u0140" => "l" - -# ł [LATIN SMALL LETTER L WITH STROKE] -"\u0142" => "l" - -# ƚ [LATIN SMALL LETTER L WITH BAR] -"\u019A" => "l" - -# ȴ [LATIN SMALL LETTER L WITH CURL] -"\u0234" => "l" - -# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] -"\u026B" => "l" - -# ɬ [LATIN SMALL LETTER L WITH BELT] -"\u026C" => "l" - -# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] -"\u026D" => "l" - -# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] -"\u1D85" => "l" - -# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] -"\u1E37" => "l" - -# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] -"\u1E39" => "l" - -# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] -"\u1E3B" => "l" - -# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] -"\u1E3D" => "l" - -# ⓛ [CIRCLED LATIN SMALL LETTER L] -"\u24DB" => "l" - -# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] -"\u2C61" => "l" - -# ꝇ [LATIN SMALL LETTER BROKEN L] -"\uA747" => "l" - -# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] -"\uA749" => "l" - -# ꞁ [LATIN SMALL LETTER TURNED L] -"\uA781" => "l" - -# l [FULLWIDTH LATIN SMALL LETTER L] -"\uFF4C" => "l" - -# LJ [LATIN CAPITAL LETTER LJ] -"\u01C7" => "LJ" - -# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] -"\u1EFA" => "LL" - -# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] -"\u01C8" => "Lj" - -# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] -"\u24A7" => "(l)" - -# lj [LATIN SMALL LETTER LJ] -"\u01C9" => "lj" - -# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] -"\u1EFB" => "ll" - -# ʪ [LATIN SMALL LETTER LS DIGRAPH] -"\u02AA" => "ls" - -# ʫ [LATIN SMALL LETTER LZ DIGRAPH] -"\u02AB" => "lz" - -# Ɯ [LATIN CAPITAL LETTER TURNED M] -"\u019C" => "M" - -# ᴍ [LATIN LETTER SMALL CAPITAL M] -"\u1D0D" => "M" - -# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] -"\u1E3E" => "M" - -# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] -"\u1E40" => "M" - -# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] -"\u1E42" => "M" - -# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] -"\u24C2" => "M" - -# Ɱ [LATIN CAPITAL LETTER M WITH HOOK] -"\u2C6E" => "M" - -# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] -"\uA7FD" => "M" - -# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] -"\uA7FF" => "M" - -# M [FULLWIDTH LATIN CAPITAL LETTER M] -"\uFF2D" => "M" - -# ɯ [LATIN SMALL LETTER TURNED M] -"\u026F" => "m" - -# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] -"\u0270" => "m" - -# ɱ [LATIN SMALL LETTER M WITH HOOK] -"\u0271" => "m" - -# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] -"\u1D6F" => "m" - -# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] -"\u1D86" => "m" - -# ḿ [LATIN SMALL LETTER M WITH ACUTE] -"\u1E3F" => "m" - -# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] -"\u1E41" => "m" - -# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] -"\u1E43" => "m" - -# ⓜ [CIRCLED LATIN SMALL LETTER M] -"\u24DC" => "m" - -# m [FULLWIDTH LATIN SMALL LETTER M] -"\uFF4D" => "m" - -# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] -"\u24A8" => "(m)" - -# Ñ [LATIN CAPITAL LETTER N WITH TILDE] -"\u00D1" => "N" - -# Ń [LATIN CAPITAL LETTER N WITH ACUTE] -"\u0143" => "N" - -# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] -"\u0145" => "N" - -# Ň [LATIN CAPITAL LETTER N WITH CARON] -"\u0147" => "N" - -# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] -"\u014A" => "N" - -# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] -"\u019D" => "N" - -# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] -"\u01F8" => "N" - -# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] -"\u0220" => "N" - -# ɴ [LATIN LETTER SMALL CAPITAL N] -"\u0274" => "N" - -# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] -"\u1D0E" => "N" - -# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] -"\u1E44" => "N" - -# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] -"\u1E46" => "N" - -# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] -"\u1E48" => "N" - -# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4A" => "N" - -# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] -"\u24C3" => "N" - -# N [FULLWIDTH LATIN CAPITAL LETTER N] -"\uFF2E" => "N" - -# ñ [LATIN SMALL LETTER N WITH TILDE] -"\u00F1" => "n" - -# ń [LATIN SMALL LETTER N WITH ACUTE] -"\u0144" => "n" - -# ņ [LATIN SMALL LETTER N WITH CEDILLA] -"\u0146" => "n" - -# ň [LATIN SMALL LETTER N WITH CARON] -"\u0148" => "n" - -# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] -"\u0149" => "n" - -# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] -"\u014B" => "n" - -# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] -"\u019E" => "n" - -# ǹ [LATIN SMALL LETTER N WITH GRAVE] -"\u01F9" => "n" - -# ȵ [LATIN SMALL LETTER N WITH CURL] -"\u0235" => "n" - -# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] -"\u0272" => "n" - -# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] -"\u0273" => "n" - -# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] -"\u1D70" => "n" - -# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] -"\u1D87" => "n" - -# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] -"\u1E45" => "n" - -# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] -"\u1E47" => "n" - -# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] -"\u1E49" => "n" - -# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] -"\u1E4B" => "n" - -# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] -"\u207F" => "n" - -# ⓝ [CIRCLED LATIN SMALL LETTER N] -"\u24DD" => "n" - -# n [FULLWIDTH LATIN SMALL LETTER N] -"\uFF4E" => "n" - -# NJ [LATIN CAPITAL LETTER NJ] -"\u01CA" => "NJ" - -# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] -"\u01CB" => "Nj" - -# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] -"\u24A9" => "(n)" - -# nj [LATIN SMALL LETTER NJ] -"\u01CC" => "nj" - -# Ò [LATIN CAPITAL LETTER O WITH GRAVE] -"\u00D2" => "O" - -# Ó [LATIN CAPITAL LETTER O WITH ACUTE] -"\u00D3" => "O" - -# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] -"\u00D4" => "O" - -# Õ [LATIN CAPITAL LETTER O WITH TILDE] -"\u00D5" => "O" - -# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] -"\u00D6" => "O" - -# Ø [LATIN CAPITAL LETTER O WITH STROKE] -"\u00D8" => "O" - -# Ō [LATIN CAPITAL LETTER O WITH MACRON] -"\u014C" => "O" - -# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] -"\u014E" => "O" - -# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] -"\u0150" => "O" - -# Ɔ [LATIN CAPITAL LETTER OPEN O] -"\u0186" => "O" - -# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] -"\u019F" => "O" - -# Ơ [LATIN CAPITAL LETTER O WITH HORN] -"\u01A0" => "O" - -# Ǒ [LATIN CAPITAL LETTER O WITH CARON] -"\u01D1" => "O" - -# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] -"\u01EA" => "O" - -# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] -"\u01EC" => "O" - -# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] -"\u01FE" => "O" - -# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] -"\u020C" => "O" - -# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] -"\u020E" => "O" - -# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] -"\u022A" => "O" - -# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] -"\u022C" => "O" - -# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] -"\u022E" => "O" - -# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] -"\u0230" => "O" - -# ᴏ [LATIN LETTER SMALL CAPITAL O] -"\u1D0F" => "O" - -# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] -"\u1D10" => "O" - -# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] -"\u1E4C" => "O" - -# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4E" => "O" - -# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] -"\u1E50" => "O" - -# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] -"\u1E52" => "O" - -# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] -"\u1ECC" => "O" - -# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] -"\u1ECE" => "O" - -# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED0" => "O" - -# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED2" => "O" - -# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED4" => "O" - -# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED6" => "O" - -# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED8" => "O" - -# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] -"\u1EDA" => "O" - -# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] -"\u1EDC" => "O" - -# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDE" => "O" - -# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] -"\u1EE0" => "O" - -# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] -"\u1EE2" => "O" - -# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] -"\u24C4" => "O" - -# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] -"\uA74A" => "O" - -# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] -"\uA74C" => "O" - -# O [FULLWIDTH LATIN CAPITAL LETTER O] -"\uFF2F" => "O" - -# ò [LATIN SMALL LETTER O WITH GRAVE] -"\u00F2" => "o" - -# ó [LATIN SMALL LETTER O WITH ACUTE] -"\u00F3" => "o" - -# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] -"\u00F4" => "o" - -# õ [LATIN SMALL LETTER O WITH TILDE] -"\u00F5" => "o" - -# ö [LATIN SMALL LETTER O WITH DIAERESIS] -"\u00F6" => "o" - -# ø [LATIN SMALL LETTER O WITH STROKE] -"\u00F8" => "o" - -# ō [LATIN SMALL LETTER O WITH MACRON] -"\u014D" => "o" - -# ŏ [LATIN SMALL LETTER O WITH BREVE] -"\u014F" => "o" - -# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] -"\u0151" => "o" - -# ơ [LATIN SMALL LETTER O WITH HORN] -"\u01A1" => "o" - -# ǒ [LATIN SMALL LETTER O WITH CARON] -"\u01D2" => "o" - -# ǫ [LATIN SMALL LETTER O WITH OGONEK] -"\u01EB" => "o" - -# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] -"\u01ED" => "o" - -# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] -"\u01FF" => "o" - -# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] -"\u020D" => "o" - -# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] -"\u020F" => "o" - -# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] -"\u022B" => "o" - -# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] -"\u022D" => "o" - -# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] -"\u022F" => "o" - -# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] -"\u0231" => "o" - -# ɔ [LATIN SMALL LETTER OPEN O] -"\u0254" => "o" - -# ɵ [LATIN SMALL LETTER BARRED O] -"\u0275" => "o" - -# ᴖ [LATIN SMALL LETTER TOP HALF O] -"\u1D16" => "o" - -# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] -"\u1D17" => "o" - -# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] -"\u1D97" => "o" - -# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] -"\u1E4D" => "o" - -# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] -"\u1E4F" => "o" - -# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] -"\u1E51" => "o" - -# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] -"\u1E53" => "o" - -# ọ [LATIN SMALL LETTER O WITH DOT BELOW] -"\u1ECD" => "o" - -# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] -"\u1ECF" => "o" - -# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] -"\u1ED1" => "o" - -# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] -"\u1ED3" => "o" - -# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] -"\u1ED5" => "o" - -# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] -"\u1ED7" => "o" - -# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] -"\u1ED9" => "o" - -# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] -"\u1EDB" => "o" - -# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] -"\u1EDD" => "o" - -# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] -"\u1EDF" => "o" - -# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] -"\u1EE1" => "o" - -# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] -"\u1EE3" => "o" - -# ₒ [LATIN SUBSCRIPT SMALL LETTER O] -"\u2092" => "o" - -# ⓞ [CIRCLED LATIN SMALL LETTER O] -"\u24DE" => "o" - -# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] -"\u2C7A" => "o" - -# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] -"\uA74B" => "o" - -# ꝍ [LATIN SMALL LETTER O WITH LOOP] -"\uA74D" => "o" - -# o [FULLWIDTH LATIN SMALL LETTER O] -"\uFF4F" => "o" - -# Œ [LATIN CAPITAL LIGATURE OE] -"\u0152" => "OE" - -# ɶ [LATIN LETTER SMALL CAPITAL OE] -"\u0276" => "OE" - -# Ꝏ [LATIN CAPITAL LETTER OO] -"\uA74E" => "OO" - -# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] -"\u0222" => "OU" - -# ᴕ [LATIN LETTER SMALL CAPITAL OU] -"\u1D15" => "OU" - -# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] -"\u24AA" => "(o)" - -# œ [LATIN SMALL LIGATURE OE] -"\u0153" => "oe" - -# ᴔ [LATIN SMALL LETTER TURNED OE] -"\u1D14" => "oe" - -# ꝏ [LATIN SMALL LETTER OO] -"\uA74F" => "oo" - -# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] -"\u0223" => "ou" - -# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] -"\u01A4" => "P" - -# ᴘ [LATIN LETTER SMALL CAPITAL P] -"\u1D18" => "P" - -# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] -"\u1E54" => "P" - -# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] -"\u1E56" => "P" - -# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] -"\u24C5" => "P" - -# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] -"\u2C63" => "P" - -# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA750" => "P" - -# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] -"\uA752" => "P" - -# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] -"\uA754" => "P" - -# P [FULLWIDTH LATIN CAPITAL LETTER P] -"\uFF30" => "P" - -# ƥ [LATIN SMALL LETTER P WITH HOOK] -"\u01A5" => "p" - -# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] -"\u1D71" => "p" - -# ᵽ [LATIN SMALL LETTER P WITH STROKE] -"\u1D7D" => "p" - -# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] -"\u1D88" => "p" - -# ṕ [LATIN SMALL LETTER P WITH ACUTE] -"\u1E55" => "p" - -# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] -"\u1E57" => "p" - -# ⓟ [CIRCLED LATIN SMALL LETTER P] -"\u24DF" => "p" - -# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] -"\uA751" => "p" - -# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] -"\uA753" => "p" - -# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] -"\uA755" => "p" - -# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] -"\uA7FC" => "p" - -# p [FULLWIDTH LATIN SMALL LETTER P] -"\uFF50" => "p" - -# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] -"\u24AB" => "(p)" - -# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] -"\u024A" => "Q" - -# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] -"\u24C6" => "Q" - -# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA756" => "Q" - -# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] -"\uA758" => "Q" - -# Q [FULLWIDTH LATIN CAPITAL LETTER Q] -"\uFF31" => "Q" - -# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] -"\u0138" => "q" - -# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] -"\u024B" => "q" - -# ʠ [LATIN SMALL LETTER Q WITH HOOK] -"\u02A0" => "q" - -# ⓠ [CIRCLED LATIN SMALL LETTER Q] -"\u24E0" => "q" - -# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] -"\uA757" => "q" - -# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] -"\uA759" => "q" - -# q [FULLWIDTH LATIN SMALL LETTER Q] -"\uFF51" => "q" - -# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] -"\u24AC" => "(q)" - -# ȹ [LATIN SMALL LETTER QP DIGRAPH] -"\u0239" => "qp" - -# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] -"\u0154" => "R" - -# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] -"\u0156" => "R" - -# Ř [LATIN CAPITAL LETTER R WITH CARON] -"\u0158" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] -"\u0210" => "R" - -# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] -"\u0212" => "R" - -# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] -"\u024C" => "R" - -# ʀ [LATIN LETTER SMALL CAPITAL R] -"\u0280" => "R" - -# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] -"\u0281" => "R" - -# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] -"\u1D19" => "R" - -# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] -"\u1D1A" => "R" - -# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] -"\u1E58" => "R" - -# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] -"\u1E5A" => "R" - -# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5C" => "R" - -# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] -"\u1E5E" => "R" - -# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] -"\u24C7" => "R" - -# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] -"\u2C64" => "R" - -# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] -"\uA75A" => "R" - -# Ꞃ [LATIN CAPITAL LETTER INSULAR R] -"\uA782" => "R" - -# R [FULLWIDTH LATIN CAPITAL LETTER R] -"\uFF32" => "R" - -# ŕ [LATIN SMALL LETTER R WITH ACUTE] -"\u0155" => "r" - -# ŗ [LATIN SMALL LETTER R WITH CEDILLA] -"\u0157" => "r" - -# ř [LATIN SMALL LETTER R WITH CARON] -"\u0159" => "r" - -# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] -"\u0211" => "r" - -# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] -"\u0213" => "r" - -# ɍ [LATIN SMALL LETTER R WITH STROKE] -"\u024D" => "r" - -# ɼ [LATIN SMALL LETTER R WITH LONG LEG] -"\u027C" => "r" - -# ɽ [LATIN SMALL LETTER R WITH TAIL] -"\u027D" => "r" - -# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] -"\u027E" => "r" - -# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] -"\u027F" => "r" - -# ᵣ [LATIN SUBSCRIPT SMALL LETTER R] -"\u1D63" => "r" - -# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] -"\u1D72" => "r" - -# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] -"\u1D73" => "r" - -# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] -"\u1D89" => "r" - -# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] -"\u1E59" => "r" - -# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] -"\u1E5B" => "r" - -# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] -"\u1E5D" => "r" - -# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] -"\u1E5F" => "r" - -# ⓡ [CIRCLED LATIN SMALL LETTER R] -"\u24E1" => "r" - -# ꝛ [LATIN SMALL LETTER R ROTUNDA] -"\uA75B" => "r" - -# ꞃ [LATIN SMALL LETTER INSULAR R] -"\uA783" => "r" - -# r [FULLWIDTH LATIN SMALL LETTER R] -"\uFF52" => "r" - -# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] -"\u24AD" => "(r)" - -# Ś [LATIN CAPITAL LETTER S WITH ACUTE] -"\u015A" => "S" - -# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] -"\u015C" => "S" - -# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] -"\u015E" => "S" - -# Š [LATIN CAPITAL LETTER S WITH CARON] -"\u0160" => "S" - -# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] -"\u0218" => "S" - -# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] -"\u1E60" => "S" - -# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] -"\u1E62" => "S" - -# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E64" => "S" - -# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] -"\u1E66" => "S" - -# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E68" => "S" - -# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] -"\u24C8" => "S" - -# ꜱ [LATIN LETTER SMALL CAPITAL S] -"\uA731" => "S" - -# ꞅ [LATIN SMALL LETTER INSULAR S] -"\uA785" => "S" - -# S [FULLWIDTH LATIN CAPITAL LETTER S] -"\uFF33" => "S" - -# ś [LATIN SMALL LETTER S WITH ACUTE] -"\u015B" => "s" - -# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] -"\u015D" => "s" - -# ş [LATIN SMALL LETTER S WITH CEDILLA] -"\u015F" => "s" - -# š [LATIN SMALL LETTER S WITH CARON] -"\u0161" => "s" - -# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] -"\u017F" => "s" - -# ș [LATIN SMALL LETTER S WITH COMMA BELOW] -"\u0219" => "s" - -# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] -"\u023F" => "s" - -# ʂ [LATIN SMALL LETTER S WITH HOOK] -"\u0282" => "s" - -# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] -"\u1D74" => "s" - -# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] -"\u1D8A" => "s" - -# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] -"\u1E61" => "s" - -# ṣ [LATIN SMALL LETTER S WITH DOT BELOW] -"\u1E63" => "s" - -# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] -"\u1E65" => "s" - -# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] -"\u1E67" => "s" - -# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] -"\u1E69" => "s" - -# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] -"\u1E9C" => "s" - -# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] -"\u1E9D" => "s" - -# ⓢ [CIRCLED LATIN SMALL LETTER S] -"\u24E2" => "s" - -# Ꞅ [LATIN CAPITAL LETTER INSULAR S] -"\uA784" => "s" - -# s [FULLWIDTH LATIN SMALL LETTER S] -"\uFF53" => "s" - -# ẞ [LATIN CAPITAL LETTER SHARP S] -"\u1E9E" => "SS" - -# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] -"\u24AE" => "(s)" - -# ß [LATIN SMALL LETTER SHARP S] -"\u00DF" => "ss" - -# st [LATIN SMALL LIGATURE ST] -"\uFB06" => "st" - -# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] -"\u0162" => "T" - -# Ť [LATIN CAPITAL LETTER T WITH CARON] -"\u0164" => "T" - -# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] -"\u0166" => "T" - -# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] -"\u01AC" => "T" - -# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] -"\u01AE" => "T" - -# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] -"\u021A" => "T" - -# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] -"\u023E" => "T" - -# ᴛ [LATIN LETTER SMALL CAPITAL T] -"\u1D1B" => "T" - -# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] -"\u1E6A" => "T" - -# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] -"\u1E6C" => "T" - -# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] -"\u1E6E" => "T" - -# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E70" => "T" - -# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] -"\u24C9" => "T" - -# Ꞇ [LATIN CAPITAL LETTER INSULAR T] -"\uA786" => "T" - -# T [FULLWIDTH LATIN CAPITAL LETTER T] -"\uFF34" => "T" - -# ţ [LATIN SMALL LETTER T WITH CEDILLA] -"\u0163" => "t" - -# ť [LATIN SMALL LETTER T WITH CARON] -"\u0165" => "t" - -# ŧ [LATIN SMALL LETTER T WITH STROKE] -"\u0167" => "t" - -# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] -"\u01AB" => "t" - -# ƭ [LATIN SMALL LETTER T WITH HOOK] -"\u01AD" => "t" - -# ț [LATIN SMALL LETTER T WITH COMMA BELOW] -"\u021B" => "t" - -# ȶ [LATIN SMALL LETTER T WITH CURL] -"\u0236" => "t" - -# ʇ [LATIN SMALL LETTER TURNED T] -"\u0287" => "t" - -# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] -"\u0288" => "t" - -# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] -"\u1D75" => "t" - -# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] -"\u1E6B" => "t" - -# ṭ [LATIN SMALL LETTER T WITH DOT BELOW] -"\u1E6D" => "t" - -# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] -"\u1E6F" => "t" - -# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] -"\u1E71" => "t" - -# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] -"\u1E97" => "t" - -# ⓣ [CIRCLED LATIN SMALL LETTER T] -"\u24E3" => "t" - -# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] -"\u2C66" => "t" - -# t [FULLWIDTH LATIN SMALL LETTER T] -"\uFF54" => "t" - -# Þ [LATIN CAPITAL LETTER THORN] -"\u00DE" => "TH" - -# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA766" => "TH" - -# Ꜩ [LATIN CAPITAL LETTER TZ] -"\uA728" => "TZ" - -# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] -"\u24AF" => "(t)" - -# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] -"\u02A8" => "tc" - -# þ [LATIN SMALL LETTER THORN] -"\u00FE" => "th" - -# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] -"\u1D7A" => "th" - -# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] -"\uA767" => "th" - -# ʦ [LATIN SMALL LETTER TS DIGRAPH] -"\u02A6" => "ts" - -# ꜩ [LATIN SMALL LETTER TZ] -"\uA729" => "tz" - -# Ù [LATIN CAPITAL LETTER U WITH GRAVE] -"\u00D9" => "U" - -# Ú [LATIN CAPITAL LETTER U WITH ACUTE] -"\u00DA" => "U" - -# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] -"\u00DB" => "U" - -# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] -"\u00DC" => "U" - -# Ũ [LATIN CAPITAL LETTER U WITH TILDE] -"\u0168" => "U" - -# Ū [LATIN CAPITAL LETTER U WITH MACRON] -"\u016A" => "U" - -# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] -"\u016C" => "U" - -# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] -"\u016E" => "U" - -# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] -"\u0170" => "U" - -# Ų [LATIN CAPITAL LETTER U WITH OGONEK] -"\u0172" => "U" - -# Ư [LATIN CAPITAL LETTER U WITH HORN] -"\u01AF" => "U" - -# Ǔ [LATIN CAPITAL LETTER U WITH CARON] -"\u01D3" => "U" - -# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] -"\u01D5" => "U" - -# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D7" => "U" - -# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] -"\u01D9" => "U" - -# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DB" => "U" - -# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] -"\u0214" => "U" - -# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] -"\u0216" => "U" - -# Ʉ [LATIN CAPITAL LETTER U BAR] -"\u0244" => "U" - -# ᴜ [LATIN LETTER SMALL CAPITAL U] -"\u1D1C" => "U" - -# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] -"\u1D7E" => "U" - -# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] -"\u1E72" => "U" - -# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] -"\u1E74" => "U" - -# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E76" => "U" - -# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] -"\u1E78" => "U" - -# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7A" => "U" - -# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] -"\u1EE4" => "U" - -# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] -"\u1EE6" => "U" - -# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] -"\u1EE8" => "U" - -# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] -"\u1EEA" => "U" - -# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EEC" => "U" - -# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] -"\u1EEE" => "U" - -# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] -"\u1EF0" => "U" - -# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] -"\u24CA" => "U" - -# U [FULLWIDTH LATIN CAPITAL LETTER U] -"\uFF35" => "U" - -# ù [LATIN SMALL LETTER U WITH GRAVE] -"\u00F9" => "u" - -# ú [LATIN SMALL LETTER U WITH ACUTE] -"\u00FA" => "u" - -# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] -"\u00FB" => "u" - -# ü [LATIN SMALL LETTER U WITH DIAERESIS] -"\u00FC" => "u" - -# ũ [LATIN SMALL LETTER U WITH TILDE] -"\u0169" => "u" - -# ū [LATIN SMALL LETTER U WITH MACRON] -"\u016B" => "u" - -# ŭ [LATIN SMALL LETTER U WITH BREVE] -"\u016D" => "u" - -# ů [LATIN SMALL LETTER U WITH RING ABOVE] -"\u016F" => "u" - -# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] -"\u0171" => "u" - -# ų [LATIN SMALL LETTER U WITH OGONEK] -"\u0173" => "u" - -# ư [LATIN SMALL LETTER U WITH HORN] -"\u01B0" => "u" - -# ǔ [LATIN SMALL LETTER U WITH CARON] -"\u01D4" => "u" - -# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] -"\u01D6" => "u" - -# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] -"\u01D8" => "u" - -# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] -"\u01DA" => "u" - -# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] -"\u01DC" => "u" - -# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] -"\u0215" => "u" - -# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] -"\u0217" => "u" - -# ʉ [LATIN SMALL LETTER U BAR] -"\u0289" => "u" - -# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] -"\u1D64" => "u" - -# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] -"\u1D99" => "u" - -# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] -"\u1E73" => "u" - -# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] -"\u1E75" => "u" - -# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] -"\u1E77" => "u" - -# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] -"\u1E79" => "u" - -# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] -"\u1E7B" => "u" - -# ụ [LATIN SMALL LETTER U WITH DOT BELOW] -"\u1EE5" => "u" - -# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] -"\u1EE7" => "u" - -# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] -"\u1EE9" => "u" - -# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] -"\u1EEB" => "u" - -# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] -"\u1EED" => "u" - -# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] -"\u1EEF" => "u" - -# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] -"\u1EF1" => "u" - -# ⓤ [CIRCLED LATIN SMALL LETTER U] -"\u24E4" => "u" - -# u [FULLWIDTH LATIN SMALL LETTER U] -"\uFF55" => "u" - -# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] -"\u24B0" => "(u)" - -# ᵫ [LATIN SMALL LETTER UE] -"\u1D6B" => "ue" - -# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] -"\u01B2" => "V" - -# Ʌ [LATIN CAPITAL LETTER TURNED V] -"\u0245" => "V" - -# ᴠ [LATIN LETTER SMALL CAPITAL V] -"\u1D20" => "V" - -# Ṽ [LATIN CAPITAL LETTER V WITH TILDE] -"\u1E7C" => "V" - -# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] -"\u1E7E" => "V" - -# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] -"\u1EFC" => "V" - -# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] -"\u24CB" => "V" - -# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] -"\uA75E" => "V" - -# Ꝩ [LATIN CAPITAL LETTER VEND] -"\uA768" => "V" - -# V [FULLWIDTH LATIN CAPITAL LETTER V] -"\uFF36" => "V" - -# ʋ [LATIN SMALL LETTER V WITH HOOK] -"\u028B" => "v" - -# ʌ [LATIN SMALL LETTER TURNED V] -"\u028C" => "v" - -# ᵥ [LATIN SUBSCRIPT SMALL LETTER V] -"\u1D65" => "v" - -# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] -"\u1D8C" => "v" - -# ṽ [LATIN SMALL LETTER V WITH TILDE] -"\u1E7D" => "v" - -# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] -"\u1E7F" => "v" - -# ⓥ [CIRCLED LATIN SMALL LETTER V] -"\u24E5" => "v" - -# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] -"\u2C71" => "v" - -# ⱴ [LATIN SMALL LETTER V WITH CURL] -"\u2C74" => "v" - -# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] -"\uA75F" => "v" - -# v [FULLWIDTH LATIN SMALL LETTER V] -"\uFF56" => "v" - -# Ꝡ [LATIN CAPITAL LETTER VY] -"\uA760" => "VY" - -# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] -"\u24B1" => "(v)" - -# ꝡ [LATIN SMALL LETTER VY] -"\uA761" => "vy" - -# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] -"\u0174" => "W" - -# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] -"\u01F7" => "W" - -# ᴡ [LATIN LETTER SMALL CAPITAL W] -"\u1D21" => "W" - -# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] -"\u1E80" => "W" - -# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] -"\u1E82" => "W" - -# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] -"\u1E84" => "W" - -# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] -"\u1E86" => "W" - -# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] -"\u1E88" => "W" - -# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] -"\u24CC" => "W" - -# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] -"\u2C72" => "W" - -# W [FULLWIDTH LATIN CAPITAL LETTER W] -"\uFF37" => "W" - -# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] -"\u0175" => "w" - -# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] -"\u01BF" => "w" - -# ʍ [LATIN SMALL LETTER TURNED W] -"\u028D" => "w" - -# ẁ [LATIN SMALL LETTER W WITH GRAVE] -"\u1E81" => "w" - -# ẃ [LATIN SMALL LETTER W WITH ACUTE] -"\u1E83" => "w" - -# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] -"\u1E85" => "w" - -# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] -"\u1E87" => "w" - -# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] -"\u1E89" => "w" - -# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] -"\u1E98" => "w" - -# ⓦ [CIRCLED LATIN SMALL LETTER W] -"\u24E6" => "w" - -# ⱳ [LATIN SMALL LETTER W WITH HOOK] -"\u2C73" => "w" - -# w [FULLWIDTH LATIN SMALL LETTER W] -"\uFF57" => "w" - -# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] -"\u24B2" => "(w)" - -# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] -"\u1E8A" => "X" - -# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] -"\u1E8C" => "X" - -# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] -"\u24CD" => "X" - -# X [FULLWIDTH LATIN CAPITAL LETTER X] -"\uFF38" => "X" - -# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] -"\u1D8D" => "x" - -# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] -"\u1E8B" => "x" - -# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] -"\u1E8D" => "x" - -# ₓ [LATIN SUBSCRIPT SMALL LETTER X] -"\u2093" => "x" - -# ⓧ [CIRCLED LATIN SMALL LETTER X] -"\u24E7" => "x" - -# x [FULLWIDTH LATIN SMALL LETTER X] -"\uFF58" => "x" - -# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] -"\u24B3" => "(x)" - -# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] -"\u00DD" => "Y" - -# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] -"\u0176" => "Y" - -# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] -"\u0178" => "Y" - -# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] -"\u01B3" => "Y" - -# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] -"\u0232" => "Y" - -# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] -"\u024E" => "Y" - -# ʏ [LATIN LETTER SMALL CAPITAL Y] -"\u028F" => "Y" - -# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] -"\u1E8E" => "Y" - -# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] -"\u1EF2" => "Y" - -# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] -"\u1EF4" => "Y" - -# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] -"\u1EF6" => "Y" - -# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] -"\u1EF8" => "Y" - -# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] -"\u1EFE" => "Y" - -# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] -"\u24CE" => "Y" - -# Y [FULLWIDTH LATIN CAPITAL LETTER Y] -"\uFF39" => "Y" - -# ý [LATIN SMALL LETTER Y WITH ACUTE] -"\u00FD" => "y" - -# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] -"\u00FF" => "y" - -# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] -"\u0177" => "y" - -# ƴ [LATIN SMALL LETTER Y WITH HOOK] -"\u01B4" => "y" - -# ȳ [LATIN SMALL LETTER Y WITH MACRON] -"\u0233" => "y" - -# ɏ [LATIN SMALL LETTER Y WITH STROKE] -"\u024F" => "y" - -# ʎ [LATIN SMALL LETTER TURNED Y] -"\u028E" => "y" - -# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] -"\u1E8F" => "y" - -# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] -"\u1E99" => "y" - -# ỳ [LATIN SMALL LETTER Y WITH GRAVE] -"\u1EF3" => "y" - -# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] -"\u1EF5" => "y" - -# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] -"\u1EF7" => "y" - -# ỹ [LATIN SMALL LETTER Y WITH TILDE] -"\u1EF9" => "y" - -# ỿ [LATIN SMALL LETTER Y WITH LOOP] -"\u1EFF" => "y" - -# ⓨ [CIRCLED LATIN SMALL LETTER Y] -"\u24E8" => "y" - -# y [FULLWIDTH LATIN SMALL LETTER Y] -"\uFF59" => "y" - -# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] -"\u24B4" => "(y)" - -# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] -"\u0179" => "Z" - -# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] -"\u017B" => "Z" - -# Ž [LATIN CAPITAL LETTER Z WITH CARON] -"\u017D" => "Z" - -# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] -"\u01B5" => "Z" - -# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] -"\u021C" => "Z" - -# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] -"\u0224" => "Z" - -# ᴢ [LATIN LETTER SMALL CAPITAL Z] -"\u1D22" => "Z" - -# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] -"\u1E90" => "Z" - -# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] -"\u1E92" => "Z" - -# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] -"\u1E94" => "Z" - -# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] -"\u24CF" => "Z" - -# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] -"\u2C6B" => "Z" - -# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] -"\uA762" => "Z" - -# Z [FULLWIDTH LATIN CAPITAL LETTER Z] -"\uFF3A" => "Z" - -# ź [LATIN SMALL LETTER Z WITH ACUTE] -"\u017A" => "z" - -# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] -"\u017C" => "z" - -# ž [LATIN SMALL LETTER Z WITH CARON] -"\u017E" => "z" - -# ƶ [LATIN SMALL LETTER Z WITH STROKE] -"\u01B6" => "z" - -# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] -"\u021D" => "z" - -# ȥ [LATIN SMALL LETTER Z WITH HOOK] -"\u0225" => "z" - -# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] -"\u0240" => "z" - -# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] -"\u0290" => "z" - -# ʑ [LATIN SMALL LETTER Z WITH CURL] -"\u0291" => "z" - -# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] -"\u1D76" => "z" - -# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] -"\u1D8E" => "z" - -# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] -"\u1E91" => "z" - -# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] -"\u1E93" => "z" - -# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] -"\u1E95" => "z" - -# ⓩ [CIRCLED LATIN SMALL LETTER Z] -"\u24E9" => "z" - -# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] -"\u2C6C" => "z" - -# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] -"\uA763" => "z" - -# z [FULLWIDTH LATIN SMALL LETTER Z] -"\uFF5A" => "z" - -# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] -"\u24B5" => "(z)" - -# ⁰ [SUPERSCRIPT ZERO] -"\u2070" => "0" - -# ₀ [SUBSCRIPT ZERO] -"\u2080" => "0" - -# ⓪ [CIRCLED DIGIT ZERO] -"\u24EA" => "0" - -# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] -"\u24FF" => "0" - -# 0 [FULLWIDTH DIGIT ZERO] -"\uFF10" => "0" - -# ¹ [SUPERSCRIPT ONE] -"\u00B9" => "1" - -# ₁ [SUBSCRIPT ONE] -"\u2081" => "1" - -# ① [CIRCLED DIGIT ONE] -"\u2460" => "1" - -# ⓵ [DOUBLE CIRCLED DIGIT ONE] -"\u24F5" => "1" - -# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] -"\u2776" => "1" - -# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] -"\u2780" => "1" - -# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] -"\u278A" => "1" - -# 1 [FULLWIDTH DIGIT ONE] -"\uFF11" => "1" - -# ⒈ [DIGIT ONE FULL STOP] -"\u2488" => "1." - -# ⑴ [PARENTHESIZED DIGIT ONE] -"\u2474" => "(1)" - -# ² [SUPERSCRIPT TWO] -"\u00B2" => "2" - -# ₂ [SUBSCRIPT TWO] -"\u2082" => "2" - -# ② [CIRCLED DIGIT TWO] -"\u2461" => "2" - -# ⓶ [DOUBLE CIRCLED DIGIT TWO] -"\u24F6" => "2" - -# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] -"\u2777" => "2" - -# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] -"\u2781" => "2" - -# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] -"\u278B" => "2" - -# 2 [FULLWIDTH DIGIT TWO] -"\uFF12" => "2" - -# ⒉ [DIGIT TWO FULL STOP] -"\u2489" => "2." - -# ⑵ [PARENTHESIZED DIGIT TWO] -"\u2475" => "(2)" - -# ³ [SUPERSCRIPT THREE] -"\u00B3" => "3" - -# ₃ [SUBSCRIPT THREE] -"\u2083" => "3" - -# ③ [CIRCLED DIGIT THREE] -"\u2462" => "3" - -# ⓷ [DOUBLE CIRCLED DIGIT THREE] -"\u24F7" => "3" - -# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] -"\u2778" => "3" - -# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] -"\u2782" => "3" - -# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] -"\u278C" => "3" - -# 3 [FULLWIDTH DIGIT THREE] -"\uFF13" => "3" - -# ⒊ [DIGIT THREE FULL STOP] -"\u248A" => "3." - -# ⑶ [PARENTHESIZED DIGIT THREE] -"\u2476" => "(3)" - -# ⁴ [SUPERSCRIPT FOUR] -"\u2074" => "4" - -# ₄ [SUBSCRIPT FOUR] -"\u2084" => "4" - -# ④ [CIRCLED DIGIT FOUR] -"\u2463" => "4" - -# ⓸ [DOUBLE CIRCLED DIGIT FOUR] -"\u24F8" => "4" - -# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] -"\u2779" => "4" - -# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] -"\u2783" => "4" - -# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] -"\u278D" => "4" - -# 4 [FULLWIDTH DIGIT FOUR] -"\uFF14" => "4" - -# ⒋ [DIGIT FOUR FULL STOP] -"\u248B" => "4." - -# ⑷ [PARENTHESIZED DIGIT FOUR] -"\u2477" => "(4)" - -# ⁵ [SUPERSCRIPT FIVE] -"\u2075" => "5" - -# ₅ [SUBSCRIPT FIVE] -"\u2085" => "5" - -# ⑤ [CIRCLED DIGIT FIVE] -"\u2464" => "5" - -# ⓹ [DOUBLE CIRCLED DIGIT FIVE] -"\u24F9" => "5" - -# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] -"\u277A" => "5" - -# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] -"\u2784" => "5" - -# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] -"\u278E" => "5" - -# 5 [FULLWIDTH DIGIT FIVE] -"\uFF15" => "5" - -# ⒌ [DIGIT FIVE FULL STOP] -"\u248C" => "5." - -# ⑸ [PARENTHESIZED DIGIT FIVE] -"\u2478" => "(5)" - -# ⁶ [SUPERSCRIPT SIX] -"\u2076" => "6" - -# ₆ [SUBSCRIPT SIX] -"\u2086" => "6" - -# ⑥ [CIRCLED DIGIT SIX] -"\u2465" => "6" - -# ⓺ [DOUBLE CIRCLED DIGIT SIX] -"\u24FA" => "6" - -# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] -"\u277B" => "6" - -# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] -"\u2785" => "6" - -# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] -"\u278F" => "6" - -# 6 [FULLWIDTH DIGIT SIX] -"\uFF16" => "6" - -# ⒍ [DIGIT SIX FULL STOP] -"\u248D" => "6." - -# ⑹ [PARENTHESIZED DIGIT SIX] -"\u2479" => "(6)" - -# ⁷ [SUPERSCRIPT SEVEN] -"\u2077" => "7" - -# ₇ [SUBSCRIPT SEVEN] -"\u2087" => "7" - -# ⑦ [CIRCLED DIGIT SEVEN] -"\u2466" => "7" - -# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] -"\u24FB" => "7" - -# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] -"\u277C" => "7" - -# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2786" => "7" - -# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] -"\u2790" => "7" - -# 7 [FULLWIDTH DIGIT SEVEN] -"\uFF17" => "7" - -# ⒎ [DIGIT SEVEN FULL STOP] -"\u248E" => "7." - -# ⑺ [PARENTHESIZED DIGIT SEVEN] -"\u247A" => "(7)" - -# ⁸ [SUPERSCRIPT EIGHT] -"\u2078" => "8" - -# ₈ [SUBSCRIPT EIGHT] -"\u2088" => "8" - -# ⑧ [CIRCLED DIGIT EIGHT] -"\u2467" => "8" - -# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] -"\u24FC" => "8" - -# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] -"\u277D" => "8" - -# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2787" => "8" - -# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] -"\u2791" => "8" - -# 8 [FULLWIDTH DIGIT EIGHT] -"\uFF18" => "8" - -# ⒏ [DIGIT EIGHT FULL STOP] -"\u248F" => "8." - -# ⑻ [PARENTHESIZED DIGIT EIGHT] -"\u247B" => "(8)" - -# ⁹ [SUPERSCRIPT NINE] -"\u2079" => "9" - -# ₉ [SUBSCRIPT NINE] -"\u2089" => "9" - -# ⑨ [CIRCLED DIGIT NINE] -"\u2468" => "9" - -# ⓽ [DOUBLE CIRCLED DIGIT NINE] -"\u24FD" => "9" - -# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] -"\u277E" => "9" - -# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] -"\u2788" => "9" - -# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] -"\u2792" => "9" - -# 9 [FULLWIDTH DIGIT NINE] -"\uFF19" => "9" - -# ⒐ [DIGIT NINE FULL STOP] -"\u2490" => "9." - -# ⑼ [PARENTHESIZED DIGIT NINE] -"\u247C" => "(9)" - -# ⑩ [CIRCLED NUMBER TEN] -"\u2469" => "10" - -# ⓾ [DOUBLE CIRCLED NUMBER TEN] -"\u24FE" => "10" - -# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] -"\u277F" => "10" - -# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] -"\u2789" => "10" - -# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] -"\u2793" => "10" - -# ⒑ [NUMBER TEN FULL STOP] -"\u2491" => "10." - -# ⑽ [PARENTHESIZED NUMBER TEN] -"\u247D" => "(10)" - -# ⑪ [CIRCLED NUMBER ELEVEN] -"\u246A" => "11" - -# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] -"\u24EB" => "11" - -# ⒒ [NUMBER ELEVEN FULL STOP] -"\u2492" => "11." - -# ⑾ [PARENTHESIZED NUMBER ELEVEN] -"\u247E" => "(11)" - -# ⑫ [CIRCLED NUMBER TWELVE] -"\u246B" => "12" - -# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] -"\u24EC" => "12" - -# ⒓ [NUMBER TWELVE FULL STOP] -"\u2493" => "12." - -# ⑿ [PARENTHESIZED NUMBER TWELVE] -"\u247F" => "(12)" - -# ⑬ [CIRCLED NUMBER THIRTEEN] -"\u246C" => "13" - -# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] -"\u24ED" => "13" - -# ⒔ [NUMBER THIRTEEN FULL STOP] -"\u2494" => "13." - -# ⒀ [PARENTHESIZED NUMBER THIRTEEN] -"\u2480" => "(13)" - -# ⑭ [CIRCLED NUMBER FOURTEEN] -"\u246D" => "14" - -# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] -"\u24EE" => "14" - -# ⒕ [NUMBER FOURTEEN FULL STOP] -"\u2495" => "14." - -# ⒁ [PARENTHESIZED NUMBER FOURTEEN] -"\u2481" => "(14)" - -# ⑮ [CIRCLED NUMBER FIFTEEN] -"\u246E" => "15" - -# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] -"\u24EF" => "15" - -# ⒖ [NUMBER FIFTEEN FULL STOP] -"\u2496" => "15." - -# ⒂ [PARENTHESIZED NUMBER FIFTEEN] -"\u2482" => "(15)" - -# ⑯ [CIRCLED NUMBER SIXTEEN] -"\u246F" => "16" - -# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] -"\u24F0" => "16" - -# ⒗ [NUMBER SIXTEEN FULL STOP] -"\u2497" => "16." - -# ⒃ [PARENTHESIZED NUMBER SIXTEEN] -"\u2483" => "(16)" - -# ⑰ [CIRCLED NUMBER SEVENTEEN] -"\u2470" => "17" - -# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] -"\u24F1" => "17" - -# ⒘ [NUMBER SEVENTEEN FULL STOP] -"\u2498" => "17." - -# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] -"\u2484" => "(17)" - -# ⑱ [CIRCLED NUMBER EIGHTEEN] -"\u2471" => "18" - -# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] -"\u24F2" => "18" - -# ⒙ [NUMBER EIGHTEEN FULL STOP] -"\u2499" => "18." - -# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] -"\u2485" => "(18)" - -# ⑲ [CIRCLED NUMBER NINETEEN] -"\u2472" => "19" - -# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] -"\u24F3" => "19" - -# ⒚ [NUMBER NINETEEN FULL STOP] -"\u249A" => "19." - -# ⒆ [PARENTHESIZED NUMBER NINETEEN] -"\u2486" => "(19)" - -# ⑳ [CIRCLED NUMBER TWENTY] -"\u2473" => "20" - -# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] -"\u24F4" => "20" - -# ⒛ [NUMBER TWENTY FULL STOP] -"\u249B" => "20." - -# ⒇ [PARENTHESIZED NUMBER TWENTY] -"\u2487" => "(20)" - -# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00AB" => "\"" - -# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] -"\u00BB" => "\"" - -# “ [LEFT DOUBLE QUOTATION MARK] -"\u201C" => "\"" - -# ” [RIGHT DOUBLE QUOTATION MARK] -"\u201D" => "\"" - -# „ [DOUBLE LOW-9 QUOTATION MARK] -"\u201E" => "\"" - -# ″ [DOUBLE PRIME] -"\u2033" => "\"" - -# ‶ [REVERSED DOUBLE PRIME] -"\u2036" => "\"" - -# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275D" => "\"" - -# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] -"\u275E" => "\"" - -# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276E" => "\"" - -# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] -"\u276F" => "\"" - -# " [FULLWIDTH QUOTATION MARK] -"\uFF02" => "\"" - -# ‘ [LEFT SINGLE QUOTATION MARK] -"\u2018" => "\'" - -# ’ [RIGHT SINGLE QUOTATION MARK] -"\u2019" => "\'" - -# ‚ [SINGLE LOW-9 QUOTATION MARK] -"\u201A" => "\'" - -# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] -"\u201B" => "\'" - -# ′ [PRIME] -"\u2032" => "\'" - -# ‵ [REVERSED PRIME] -"\u2035" => "\'" - -# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] -"\u2039" => "\'" - -# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] -"\u203A" => "\'" - -# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] -"\u275B" => "\'" - -# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] -"\u275C" => "\'" - -# ' [FULLWIDTH APOSTROPHE] -"\uFF07" => "\'" - -# ‐ [HYPHEN] -"\u2010" => "-" - -# ‑ [NON-BREAKING HYPHEN] -"\u2011" => "-" - -# ‒ [FIGURE DASH] -"\u2012" => "-" - -# – [EN DASH] -"\u2013" => "-" - -# — [EM DASH] -"\u2014" => "-" - -# ⁻ [SUPERSCRIPT MINUS] -"\u207B" => "-" - -# ₋ [SUBSCRIPT MINUS] -"\u208B" => "-" - -# - [FULLWIDTH HYPHEN-MINUS] -"\uFF0D" => "-" - -# ⁅ [LEFT SQUARE BRACKET WITH QUILL] -"\u2045" => "[" - -# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] -"\u2772" => "[" - -# [ [FULLWIDTH LEFT SQUARE BRACKET] -"\uFF3B" => "[" - -# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] -"\u2046" => "]" - -# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] -"\u2773" => "]" - -# ] [FULLWIDTH RIGHT SQUARE BRACKET] -"\uFF3D" => "]" - -# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] -"\u207D" => "(" - -# ₍ [SUBSCRIPT LEFT PARENTHESIS] -"\u208D" => "(" - -# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] -"\u2768" => "(" - -# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] -"\u276A" => "(" - -# ( [FULLWIDTH LEFT PARENTHESIS] -"\uFF08" => "(" - -# ⸨ [LEFT DOUBLE PARENTHESIS] -"\u2E28" => "((" - -# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] -"\u207E" => ")" - -# ₎ [SUBSCRIPT RIGHT PARENTHESIS] -"\u208E" => ")" - -# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] -"\u2769" => ")" - -# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] -"\u276B" => ")" - -# ) [FULLWIDTH RIGHT PARENTHESIS] -"\uFF09" => ")" - -# ⸩ [RIGHT DOUBLE PARENTHESIS] -"\u2E29" => "))" - -# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u276C" => "<" - -# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] -"\u2770" => "<" - -# < [FULLWIDTH LESS-THAN SIGN] -"\uFF1C" => "<" - -# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u276D" => ">" - -# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] -"\u2771" => ">" - -# > [FULLWIDTH GREATER-THAN SIGN] -"\uFF1E" => ">" - -# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] -"\u2774" => "{" - -# { [FULLWIDTH LEFT CURLY BRACKET] -"\uFF5B" => "{" - -# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] -"\u2775" => "}" - -# } [FULLWIDTH RIGHT CURLY BRACKET] -"\uFF5D" => "}" - -# ⁺ [SUPERSCRIPT PLUS SIGN] -"\u207A" => "+" - -# ₊ [SUBSCRIPT PLUS SIGN] -"\u208A" => "+" - -# + [FULLWIDTH PLUS SIGN] -"\uFF0B" => "+" - -# ⁼ [SUPERSCRIPT EQUALS SIGN] -"\u207C" => "=" - -# ₌ [SUBSCRIPT EQUALS SIGN] -"\u208C" => "=" - -# = [FULLWIDTH EQUALS SIGN] -"\uFF1D" => "=" - -# ! [FULLWIDTH EXCLAMATION MARK] -"\uFF01" => "!" - -# ‼ [DOUBLE EXCLAMATION MARK] -"\u203C" => "!!" - -# ⁉ [EXCLAMATION QUESTION MARK] -"\u2049" => "!?" - -# # [FULLWIDTH NUMBER SIGN] -"\uFF03" => "#" - -# $ [FULLWIDTH DOLLAR SIGN] -"\uFF04" => "$" - -# ⁒ [COMMERCIAL MINUS SIGN] -"\u2052" => "%" - -# % [FULLWIDTH PERCENT SIGN] -"\uFF05" => "%" - -# & [FULLWIDTH AMPERSAND] -"\uFF06" => "&" - -# ⁎ [LOW ASTERISK] -"\u204E" => "*" - -# * [FULLWIDTH ASTERISK] -"\uFF0A" => "*" - -# , [FULLWIDTH COMMA] -"\uFF0C" => "," - -# . [FULLWIDTH FULL STOP] -"\uFF0E" => "." - -# ⁄ [FRACTION SLASH] -"\u2044" => "/" - -# / [FULLWIDTH SOLIDUS] -"\uFF0F" => "/" - -# : [FULLWIDTH COLON] -"\uFF1A" => ":" - -# ⁏ [REVERSED SEMICOLON] -"\u204F" => ";" - -# ; [FULLWIDTH SEMICOLON] -"\uFF1B" => ";" - -# ? [FULLWIDTH QUESTION MARK] -"\uFF1F" => "?" - -# ⁇ [DOUBLE QUESTION MARK] -"\u2047" => "??" - -# ⁈ [QUESTION EXCLAMATION MARK] -"\u2048" => "?!" - -# @ [FULLWIDTH COMMERCIAL AT] -"\uFF20" => "@" - -# \ [FULLWIDTH REVERSE SOLIDUS] -"\uFF3C" => "\\" - -# ‸ [CARET] -"\u2038" => "^" - -# ^ [FULLWIDTH CIRCUMFLEX ACCENT] -"\uFF3E" => "^" - -# _ [FULLWIDTH LOW LINE] -"\uFF3F" => "_" - -# ⁓ [SWUNG DASH] -"\u2053" => "~" - -# ~ [FULLWIDTH TILDE] -"\uFF5E" => "~" - -################################################################ -# Below is the Perl script used to generate the above mappings # -# from ASCIIFoldingFilter.java: # -################################################################ -# -# #!/usr/bin/perl -# -# use warnings; -# use strict; -# -# my @source_chars = (); -# my @source_char_descriptions = (); -# my $target = ''; -# -# while (<>) { -# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { -# push @source_chars, $1; -# push @source_char_descriptions, $2; -# next; -# } -# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { -# $target .= $1; -# next; -# } -# if (/break;/) { -# $target = "\\\"" if ($target eq '"'); -# for my $source_char_num (0..$#source_chars) { -# print "# $source_char_descriptions[$source_char_num]\n"; -# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; -# } -# @source_chars = (); -# @source_char_descriptions = (); -# $target = ''; -# } -# } diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-ISOLatin1Accent.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-ISOLatin1Accent.txt deleted file mode 100644 index ede77425..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/mapping-ISOLatin1Accent.txt +++ /dev/null @@ -1,246 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Syntax: -# "source" => "target" -# "source".length() > 0 (source cannot be empty.) -# "target".length() >= 0 (target can be empty.) - -# example: -# "À" => "A" -# "\u00C0" => "A" -# "\u00C0" => "\u0041" -# "ß" => "ss" -# "\t" => " " -# "\n" => "" - -# À => A -"\u00C0" => "A" - -# Á => A -"\u00C1" => "A" - -#  => A -"\u00C2" => "A" - -# à => A -"\u00C3" => "A" - -# Ä => A -"\u00C4" => "A" - -# Å => A -"\u00C5" => "A" - -# Æ => AE -"\u00C6" => "AE" - -# Ç => C -"\u00C7" => "C" - -# È => E -"\u00C8" => "E" - -# É => E -"\u00C9" => "E" - -# Ê => E -"\u00CA" => "E" - -# Ë => E -"\u00CB" => "E" - -# Ì => I -"\u00CC" => "I" - -# Í => I -"\u00CD" => "I" - -# Î => I -"\u00CE" => "I" - -# Ï => I -"\u00CF" => "I" - -# IJ => IJ -"\u0132" => "IJ" - -# Ð => D -"\u00D0" => "D" - -# Ñ => N -"\u00D1" => "N" - -# Ò => O -"\u00D2" => "O" - -# Ó => O -"\u00D3" => "O" - -# Ô => O -"\u00D4" => "O" - -# Õ => O -"\u00D5" => "O" - -# Ö => O -"\u00D6" => "O" - -# Ø => O -"\u00D8" => "O" - -# Œ => OE -"\u0152" => "OE" - -# Þ -"\u00DE" => "TH" - -# Ù => U -"\u00D9" => "U" - -# Ú => U -"\u00DA" => "U" - -# Û => U -"\u00DB" => "U" - -# Ü => U -"\u00DC" => "U" - -# Ý => Y -"\u00DD" => "Y" - -# Ÿ => Y -"\u0178" => "Y" - -# à => a -"\u00E0" => "a" - -# á => a -"\u00E1" => "a" - -# â => a -"\u00E2" => "a" - -# ã => a -"\u00E3" => "a" - -# ä => a -"\u00E4" => "a" - -# å => a -"\u00E5" => "a" - -# æ => ae -"\u00E6" => "ae" - -# ç => c -"\u00E7" => "c" - -# è => e -"\u00E8" => "e" - -# é => e -"\u00E9" => "e" - -# ê => e -"\u00EA" => "e" - -# ë => e -"\u00EB" => "e" - -# ì => i -"\u00EC" => "i" - -# í => i -"\u00ED" => "i" - -# î => i -"\u00EE" => "i" - -# ï => i -"\u00EF" => "i" - -# ij => ij -"\u0133" => "ij" - -# ð => d -"\u00F0" => "d" - -# ñ => n -"\u00F1" => "n" - -# ò => o -"\u00F2" => "o" - -# ó => o -"\u00F3" => "o" - -# ô => o -"\u00F4" => "o" - -# õ => o -"\u00F5" => "o" - -# ö => o -"\u00F6" => "o" - -# ø => o -"\u00F8" => "o" - -# œ => oe -"\u0153" => "oe" - -# ß => ss -"\u00DF" => "ss" - -# þ => th -"\u00FE" => "th" - -# ù => u -"\u00F9" => "u" - -# ú => u -"\u00FA" => "u" - -# û => u -"\u00FB" => "u" - -# ü => u -"\u00FC" => "u" - -# ý => y -"\u00FD" => "y" - -# ÿ => y -"\u00FF" => "y" - -# ff => ff -"\uFB00" => "ff" - -# fi => fi -"\uFB01" => "fi" - -# fl => fl -"\uFB02" => "fl" - -# ffi => ffi -"\uFB03" => "ffi" - -# ffl => ffl -"\uFB04" => "ffl" - -# ſt => ft -"\uFB05" => "ft" - -# st => st -"\uFB06" => "st" diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/params.json b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/params.json deleted file mode 100644 index ac72676a..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/params.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "params": { - "_UPDATE_JSON_DOCS": { - "srcField": "_src_", - "mapUniqueKeyOnly": true, - "": { - "v": 0 - } - } - } -} \ No newline at end of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/protwords.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/protwords.txt deleted file mode 100644 index 1dfc0abe..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/spellings.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/spellings.txt deleted file mode 100644 index d7ede6f5..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/spellings.txt +++ /dev/null @@ -1,2 +0,0 @@ -pizza -history \ No newline at end of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/stopwords.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/stopwords.txt deleted file mode 100644 index ae1e83ee..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt deleted file mode 100644 index eab4ee87..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/update-script.js b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/update-script.js deleted file mode 100644 index 9eaf7b8d..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/update-script.js +++ /dev/null @@ -1,53 +0,0 @@ -/* - This is a basic skeleton JavaScript update processor. - - In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in - the example solrconfig.xml and must be uncommented to be enabled. - - See https://solr.apache.org/guide/script-update-processor.html for more details. -*/ - -function processAdd(cmd) { - - doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument - id = doc.getFieldValue("id"); - logger.warn("update-script#processAdd: id=" + id); // WARN level messages will show up in Solr Admin Logging UI - -// Set a field value: -// doc.setField("foo_s", "whatever"); - -// Get a configuration parameter: -// config_param = params.get('config_param'); // "params" only exists if processor configured with - -// Get a request parameter: -// some_param = req.getParams().get("some_param") - -// Add a field of field names that match a pattern: -// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss -// field_names = doc.getFieldNames().toArray(); -// for(i=0; i < field_names.length; i++) { -// field_name = field_names[i]; -// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } -// } - -} - -function processDelete(cmd) { - // no-op -} - -function processMergeIndexes(cmd) { - // no-op -} - -function processCommit(cmd) { - // no-op -} - -function processRollback(cmd) { - // no-op -} - -function finish() { - // no-op -} diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example.xsl b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example.xsl deleted file mode 100644 index b8992700..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example.xsl +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - <xsl:value-of select="$title"/> - - - -

-
- This has been formatted by the sample "example.xsl" transform - - use your own XSLT to get a nicer page -
- - - -
- - - -
- - - - -
-
-
- - - - - - - - - - - - - - javascript:toggle("");? -
- - exp - - - - - -
- - -
- - - - - - - -
    - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - -
diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_atom.xsl b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_atom.xsl deleted file mode 100644 index 8c36bac1..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_atom.xsl +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - Example Solr Atom 1.0 Feed - - This has been formatted by the sample "example_atom.xsl" transform - - use your own XSLT to get a nicer Atom feed. - - - Apache Solr - users@solr.apache.org - - - - - - tag:localhost,2007:example - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - tag:localhost,2007: - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_rss.xsl b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_rss.xsl deleted file mode 100644 index c8ab5bfb..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/example_rss.xsl +++ /dev/null @@ -1,66 +0,0 @@ - - - - - - - - - - - - - Example Solr RSS 2.0 Feed - http://localhost:8983/solr - - This has been formatted by the sample "example_rss.xsl" transform - - use your own XSLT to get a nicer RSS feed. - - en-us - http://localhost:8983/solr - - - - - - - - - - - <xsl:value-of select="str[@name='name']"/> - - http://localhost:8983/solr/select?q=id: - - - - - - - http://localhost:8983/solr/select?q=id: - - - - diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/luke.xsl b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/luke.xsl deleted file mode 100644 index 05fb5bfe..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/luke.xsl +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - Solr Luke Request Handler Response - - - - - - - - - <xsl:value-of select="$title"/> - - - - - -

- -

-
- -
- -

Index Statistics

- -
- -

Field Statistics

- - - -

Document statistics

- - - - -
- - - - - -
- -
- - -
- -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - -
-

- -

- -
- -
-
-
- - -
- - 50 - 800 - 160 - blue - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- background-color: ; width: px; height: px; -
-
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    - -
  • - -
  • -
    -
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/updateXml.xsl b/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/updateXml.xsl deleted file mode 100644 index 90c506d7..00000000 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/xslt/updateXml.xsl +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ca.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ca.txt deleted file mode 100644 index 307a85f9..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ca.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Set of Catalan contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -l -m -n -s -t diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_fr.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_fr.txt deleted file mode 100644 index f1bba51b..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_fr.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Set of French contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -l -m -t -qu -n -s -j -d -c -jusqu -quoiqu -lorsqu -puisqu diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ga.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ga.txt deleted file mode 100644 index 9ebe7fa3..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -d -m -b diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_it.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_it.txt deleted file mode 100644 index cac04095..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/contractions_it.txt +++ /dev/null @@ -1,23 +0,0 @@ -# Set of Italian contractions for ElisionFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -c -l -all -dall -dell -nell -sull -coll -pell -gl -agl -dagl -degl -negl -sugl -un -m -t -s -v -d diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/hyphenations_ga.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/hyphenations_ga.txt deleted file mode 100644 index 4d2642cc..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/hyphenations_ga.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Set of Irish hyphenations for StopFilter -# TODO: load this as a resource from the analyzer and sync it in build.xml -h -n -t diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stemdict_nl.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stemdict_nl.txt deleted file mode 100644 index 44107297..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stemdict_nl.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Set of overrides for the dutch stemmer -# TODO: load this as a resource from the analyzer and sync it in build.xml -fiets fiets -bromfiets bromfiets -ei eier -kind kinder diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stoptags_ja.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b75084..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ar.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ar.txt deleted file mode 100644 index 046829db..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ar.txt +++ /dev/null @@ -1,125 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Cleaned on October 11, 2009 (not normalized, so use before normalization) -# This means that when modifying this list, you might need to add some -# redundant entries, for example containing forms with both أ and ا -من -ومن -منها -منه -في -وفي -فيها -فيه -و -ف -ثم -او -أو -ب -بها -به -ا -أ -اى -اي -أي -أى -لا -ولا -الا -ألا -إلا -لكن -ما -وما -كما -فما -عن -مع -اذا -إذا -ان -أن -إن -انها -أنها -إنها -انه -أنه -إنه -بان -بأن -فان -فأن -وان -وأن -وإن -التى -التي -الذى -الذي -الذين -الى -الي -إلى -إلي -على -عليها -عليه -اما -أما -إما -ايضا -أيضا -كل -وكل -لم -ولم -لن -ولن -هى -هي -هو -وهى -وهي -وهو -فهى -فهي -فهو -انت -أنت -لك -لها -له -هذه -هذا -تلك -ذلك -هناك -كانت -كان -يكون -تكون -وكانت -وكان -غير -بعض -قد -نحو -بين -بينما -منذ -ضمن -حيث -الان -الآن -خلال -بعد -قبل -حتى -عند -عندما -لدى -جميع diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_bg.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_bg.txt deleted file mode 100644 index 1ae4ba2a..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_bg.txt +++ /dev/null @@ -1,193 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -а -аз -ако -ала -бе -без -беше -би -бил -била -били -било -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главно -го -д -да -дали -до -докато -докога -дори -досега -доста -е -едва -един -ето -за -зад -заедно -заради -засега -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -ли -м -ме -между -мен -ми -мнозина -мога -могат -може -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нея -ни -ние -никой -нито -но -някои -някой -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първо -с -са -само -се -сега -си -скоро -след -сме -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -тн -то -това -тогава -този -той -толкова -точно -трябва -тук -тъй -тя -тях -у -харесва -ч -че -често -чрез -ще -щом -я diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ca.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ca.txt deleted file mode 100644 index 3da65dea..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ca.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) -a -abans -ací -ah -així -això -al -als -aleshores -algun -alguna -algunes -alguns -alhora -allà -allí -allò -altra -altre -altres -amb -ambdós -ambdues -apa -aquell -aquella -aquelles -aquells -aquest -aquesta -aquestes -aquests -aquí -baix -cada -cadascú -cadascuna -cadascunes -cadascuns -com -contra -d'un -d'una -d'unes -d'uns -dalt -de -del -dels -des -després -dins -dintre -donat -doncs -durant -e -eh -el -els -em -en -encara -ens -entre -érem -eren -éreu -es -és -esta -està -estàvem -estaven -estàveu -esteu -et -etc -ets -fins -fora -gairebé -ha -han -has -havia -he -hem -heu -hi -ho -i -igual -iguals -ja -l'hi -la -les -li -li'n -llavors -m'he -ma -mal -malgrat -mateix -mateixa -mateixes -mateixos -me -mentre -més -meu -meus -meva -meves -molt -molta -moltes -molts -mon -mons -n'he -n'hi -ne -ni -no -nogensmenys -només -nosaltres -nostra -nostre -nostres -o -oh -oi -on -pas -pel -pels -per -però -perquè -poc -poca -pocs -poques -potser -propi -qual -quals -quan -quant -que -què -quelcom -qui -quin -quina -quines -quins -s'ha -s'han -sa -semblant -semblants -ses -seu -seus -seva -seva -seves -si -sobre -sobretot -sóc -solament -sols -son -són -sons -sota -sou -t'ha -t'han -t'he -ta -tal -també -tampoc -tan -tant -tanta -tantes -teu -teus -teva -teves -ton -tons -tot -tota -totes -tots -un -una -unes -uns -us -va -vaig -vam -van -vas -veu -vosaltres -vostra -vostre -vostres diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_cz.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_cz.txt deleted file mode 100644 index 53c6097d..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_cz.txt +++ /dev/null @@ -1,172 +0,0 @@ -a -s -k -o -i -u -v -z -dnes -cz -tímto -budeš -budem -byli -jseš -můj -svým -ta -tomto -tohle -tuto -tyto -jej -zda -proč -máte -tato -kam -tohoto -kdo -kteří -mi -nám -tom -tomuto -mít -nic -proto -kterou -byla -toho -protože -asi -ho -naši -napište -re -což -tím -takže -svých -její -svými -jste -aj -tu -tedy -teto -bylo -kde -ke -pravé -ji -nad -nejsou -či -pod -téma -mezi -přes -ty -pak -vám -ani -když -však -neg -jsem -tento -článku -články -aby -jsme -před -pta -jejich -byl -ještě -až -bez -také -pouze -první -vaše -která -nás -nový -tipy -pokud -může -strana -jeho -své -jiné -zprávy -nové -není -vás -jen -podle -zde -už -být -více -bude -již -než -který -by -které -co -nebo -ten -tak -má -při -od -po -jsou -jak -další -ale -si -se -ve -to -jako -za -zpět -ze -do -pro -je -na -atd -atp -jakmile -přičemž -já -on -ona -ono -oni -ony -my -vy -jí -ji -mě -mne -jemu -tomu -těm -těmu -němu -němuž -jehož -jíž -jelikož -jež -jakož -načež diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_da.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_da.txt deleted file mode 100644 index 42e6145b..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_da.txt +++ /dev/null @@ -1,110 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Danish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - -og | and -i | in -jeg | I -det | that (dem. pronoun)/it (pers. pronoun) -at | that (in front of a sentence)/to (with infinitive) -en | a/an -den | it (pers. pronoun)/that (dem. pronoun) -til | to/at/for/until/against/by/of/into, more -er | present tense of "to be" -som | who, as -på | on/upon/in/on/at/to/after/of/with/for, on -de | they -med | with/by/in, along -han | he -af | of/by/from/off/for/in/with/on, off -for | at/for/to/from/by/of/ago, in front/before, because -ikke | not -der | who/which, there/those -var | past tense of "to be" -mig | me/myself -sig | oneself/himself/herself/itself/themselves -men | but -et | a/an/one, one (number), someone/somebody/one -har | present tense of "to have" -om | round/about/for/in/a, about/around/down, if -vi | we -min | my -havde | past tense of "to have" -ham | him -hun | she -nu | now -over | over/above/across/by/beyond/past/on/about, over/past -da | then, when/as/since -fra | from/off/since, off, since -du | you -ud | out -sin | his/her/its/one's -dem | them -os | us/ourselves -op | up -man | you/one -hans | his -hvor | where -eller | or -hvad | what -skal | must/shall etc. -selv | myself/youself/herself/ourselves etc., even -her | here -alle | all/everyone/everybody etc. -vil | will (verb) -blev | past tense of "to stay/to remain/to get/to become" -kunne | could -ind | in -når | when -være | present tense of "to be" -dog | however/yet/after all -noget | something -ville | would -jo | you know/you see (adv), yes -deres | their/theirs -efter | after/behind/according to/for/by/from, later/afterwards -ned | down -skulle | should -denne | this -end | than -dette | this -mit | my/mine -også | also -under | under/beneath/below/during, below/underneath -have | have -dig | you -anden | other -hende | her -mine | my -alt | everything -meget | much/very, plenty of -sit | his, her, its, one's -sine | his, her, its, one's -vor | our -mod | against -disse | these -hvis | if -din | your/yours -nogle | some -hos | by/at -blive | be/become -mange | many -ad | by/through -bliver | present tense of "to be/to become" -hendes | her/hers -været | be -thi | for (conj) -jer | you -sådan | such, like this/like that diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_de.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_de.txt deleted file mode 100644 index 86525e7a..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_de.txt +++ /dev/null @@ -1,294 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A German stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | The number of forms in this list is reduced significantly by passing it - | through the German stemmer. - - -aber | but - -alle | all -allem -allen -aller -alles - -als | than, as -also | so -am | an + dem -an | at - -ander | other -andere -anderem -anderen -anderer -anderes -anderm -andern -anderr -anders - -auch | also -auf | on -aus | out of -bei | by -bin | am -bis | until -bist | art -da | there -damit | with it -dann | then - -der | the -den -des -dem -die -das - -daß | that - -derselbe | the same -derselben -denselben -desselben -demselben -dieselbe -dieselben -dasselbe - -dazu | to that - -dein | thy -deine -deinem -deinen -deiner -deines - -denn | because - -derer | of those -dessen | of him - -dich | thee -dir | to thee -du | thou - -dies | this -diese -diesem -diesen -dieser -dieses - - -doch | (several meanings) -dort | (over) there - - -durch | through - -ein | a -eine -einem -einen -einer -eines - -einig | some -einige -einigem -einigen -einiger -einiges - -einmal | once - -er | he -ihn | him -ihm | to him - -es | it -etwas | something - -euer | your -eure -eurem -euren -eurer -eures - -für | for -gegen | towards -gewesen | p.p. of sein -hab | have -habe | have -haben | have -hat | has -hatte | had -hatten | had -hier | here -hin | there -hinter | behind - -ich | I -mich | me -mir | to me - - -ihr | you, to her -ihre -ihrem -ihren -ihrer -ihres -euch | to you - -im | in + dem -in | in -indem | while -ins | in + das -ist | is - -jede | each, every -jedem -jeden -jeder -jedes - -jene | that -jenem -jenen -jener -jenes - -jetzt | now -kann | can - -kein | no -keine -keinem -keinen -keiner -keines - -können | can -könnte | could -machen | do -man | one - -manche | some, many a -manchem -manchen -mancher -manches - -mein | my -meine -meinem -meinen -meiner -meines - -mit | with -muss | must -musste | had to -nach | to(wards) -nicht | not -nichts | nothing -noch | still, yet -nun | now -nur | only -ob | whether -oder | or -ohne | without -sehr | very - -sein | his -seine -seinem -seinen -seiner -seines - -selbst | self -sich | herself - -sie | they, she -ihnen | to them - -sind | are -so | so - -solche | such -solchem -solchen -solcher -solches - -soll | shall -sollte | should -sondern | but -sonst | else -über | over -um | about, around -und | and - -uns | us -unse -unsem -unsen -unser -unses - -unter | under -viel | much -vom | von + dem -von | from -vor | before -während | while -war | was -waren | were -warst | wast -was | what -weg | away, off -weil | because -weiter | further - -welche | which -welchem -welchen -welcher -welches - -wenn | when -werde | will -werden | will -wie | how -wieder | again -will | want -wir | we -wird | will -wirst | willst -wo | where -wollen | want -wollte | wanted -würde | would -würden | would -zu | to -zum | zu + dem -zur | zu + der -zwar | indeed -zwischen | between - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_el.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_el.txt deleted file mode 100644 index 232681f5..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_el.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Lucene Greek Stopwords list -# Note: by default this file is used after GreekLowerCaseFilter, -# so when modifying this file use 'σ' instead of 'ς' -ο -η -το -οι -τα -του -τησ -των -τον -την -και -κι -κ -ειμαι -εισαι -ειναι -ειμαστε -ειστε -στο -στον -στη -στην -μα -αλλα -απο -για -προσ -με -σε -ωσ -παρα -αντι -κατα -μετα -θα -να -δε -δεν -μη -μην -επι -ενω -εαν -αν -τοτε -που -πωσ -ποιοσ -ποια -ποιο -ποιοι -ποιεσ -ποιων -ποιουσ -αυτοσ -αυτη -αυτο -αυτοι -αυτων -αυτουσ -αυτεσ -αυτα -εκεινοσ -εκεινη -εκεινο -εκεινοι -εκεινεσ -εκεινα -εκεινων -εκεινουσ -οπωσ -ομωσ -ισωσ -οσο -οτι diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_en.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_es.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_es.txt deleted file mode 100644 index 487d78c8..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_es.txt +++ /dev/null @@ -1,356 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Spanish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | from, of -la | the, her -que | who, that -el | the -en | in -y | and -a | to -los | the, them -del | de + el -se | himself, from him etc -las | the, them -por | for, by, etc -un | a -para | for -con | with -no | no -una | a -su | his, her -al | a + el - | es from SER -lo | him -como | how -más | more -pero | pero -sus | su plural -le | to him, her -ya | already -o | or - | fue from SER -este | this - | ha from HABER -sí | himself etc -porque | because -esta | this - | son from SER -entre | between - | está from ESTAR -cuando | when -muy | very -sin | without -sobre | on - | ser from SER - | tiene from TENER -también | also -me | me -hasta | until -hay | there is/are -donde | where - | han from HABER -quien | whom, that - | están from ESTAR - | estado from ESTAR -desde | from -todo | all -nos | us -durante | during - | estados from ESTAR -todos | all -uno | a -les | to them -ni | nor -contra | against -otros | other - | fueron from SER -ese | that -eso | that - | había from HABER -ante | before -ellos | they -e | and (variant of y) -esto | this -mí | me -antes | before -algunos | some -qué | what? -unos | a -yo | I -otro | other -otras | other -otra | other -él | he -tanto | so much, many -esa | that -estos | these -mucho | much, many -quienes | who -nada | nothing -muchos | many -cual | who - | sea from SER -poco | few -ella | she -estar | to be - | haber from HABER -estas | these - | estaba from ESTAR - | estamos from ESTAR -algunas | some -algo | something -nosotros | we - - | other forms - -mi | me -mis | mi plural -tú | thou -te | thee -ti | thee -tu | thy -tus | tu plural -ellas | they -nosotras | we -vosotros | you -vosotras | you -os | you -mío | mine -mía | -míos | -mías | -tuyo | thine -tuya | -tuyos | -tuyas | -suyo | his, hers, theirs -suya | -suyos | -suyas | -nuestro | ours -nuestra | -nuestros | -nuestras | -vuestro | yours -vuestra | -vuestros | -vuestras | -esos | those -esas | those - - | forms of estar, to be (not including the infinitive): -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad - - | forms of haber, to have (not including the infinitive): -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas - - | forms of ser, to be (not including the infinitive): -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido - | sed also means 'thirst' - - | forms of tener, to have (not including the infinitive): -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_et.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_et.txt deleted file mode 100644 index 1b06a134..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_et.txt +++ /dev/null @@ -1,1603 +0,0 @@ -# Estonian stopwords list -all -alla -allapoole -allpool -alt -altpoolt -eel -eespool -enne -hommikupoole -hoolimata -ilma -kaudu -keset -kesk -kohe -koos -kuhupoole -kuni -kuspool -kustpoolt -kõige -käsikäes -lappi -ligi -läbi -mööda -paitsi -peale -pealepoole -pealpool -pealt -pealtpoolt -piki -pikku -piku -pikuti -põiki -pärast -päri -risti -sealpool -sealtpoolt -seespool -seltsis -siiapoole -siinpool -siitpoolt -sinnapoole -sissepoole -taga -tagantpoolt -tagapidi -tagapool -taha -tahapoole -teispool -teispoole -tänu -tükkis -vaatamata -vastu -väljapoole -väljaspool -väljastpoolt -õhtupoole -ühes -ühestükis -ühestükkis -ülalpool -ülaltpoolt -üle -ülespoole -ülevalpool -ülevaltpoolt -ümber -ümbert -aegu -aegus -alguks -algul -algule -algult -alguni -all -alla -alt -alul -alutsi -arvel -asemel -asemele -eel -eeli -ees -eesotsas -eest -eestotsast -esitsi -ette -etteotsa -haaval -heaks -hoolimata -hulgas -hulgast -hulka -jalgu -jalus -jalust -jaoks -jooksul -juurde -juures -juurest -jälil -jälile -järel -järele -järelt -järgi -kaasas -kallal -kallale -kallalt -kamul -kannul -kannule -kannult -kaudu -kaupa -keskel -keskele -keskelt -keskis -keskpaiku -kestel -kestes -kilda -killas -killast -kimpu -kimpus -kiuste -kohal -kohale -kohalt -kohaselt -kohe -kohta -koos -korral -kukil -kukile -kukilt -kulul -kõrva -kõrval -kõrvale -kõrvalt -kõrvas -kõrvast -käekõrval -käekõrvale -käekõrvalt -käes -käest -kätte -külge -küljes -küljest -küüsi -küüsis -küüsist -ligi -ligidal -ligidale -ligidalt -aegu -aegus -alguks -algul -algule -algult -alguni -all -alla -alt -alul -alutsi -arvel -asemel -asemele -eel -eeli -ees -eesotsas -eest -eestotsast -esitsi -ette -etteotsa -haaval -heaks -hoolimata -hulgas -hulgast -hulka -jalgu -jalus -jalust -jaoks -jooksul -juurde -juures -juurest -jälil -jälile -järel -järele -järelt -järgi -kaasas -kallal -kallale -kallalt -kamul -kannul -kannule -kannult -kaudu -kaupa -keskel -keskele -keskelt -keskis -keskpaiku -kestel -kestes -kilda -killas -killast -kimpu -kimpus -kiuste -kohal -kohale -kohalt -kohaselt -kohe -kohta -koos -korral -kukil -kukile -kukilt -kulul -kõrva -kõrval -kõrvale -kõrvalt -kõrvas -kõrvast -käekõrval -käekõrvale -käekõrvalt -käes -käest -kätte -külge -küljes -küljest -küüsi -küüsis -küüsist -ligi -ligidal -ligidale -ligidalt -lool -läbi -lähedal -lähedale -lähedalt -man -mant -manu -meelest -mööda -nahas -nahka -nahkas -najal -najale -najalt -nõjal -nõjale -otsa -otsas -otsast -paigale -paigu -paiku -peal -peale -pealt -perra -perrä -pidi -pihta -piki -pikku -pool -poole -poolest -poolt -puhul -puksiiris -pähe -päralt -päras -pärast -päri -ringi -ringis -risust -saadetusel -saadik -saatel -saati -seas -seast -sees -seest -sekka -seljataga -seltsi -seltsis -seltsist -sisse -slepis -suhtes -šlepis -taga -tagant -tagantotsast -tagaotsas -tagaselja -tagasi -tagast -tagutsi -taha -tahaotsa -takka -tarvis -tasa -tuuri -tuuris -tõttu -tükkis -uhal -vaatamata -vahel -vahele -vahelt -vahepeal -vahepeale -vahepealt -vahetsi -varal -varale -varul -vastas -vastast -vastu -veerde -veeres -viisi -võidu -võrd -võrdki -võrra -võrragi -väel -väele -vältel -väärt -väärtki -äärde -ääre -ääres -äärest -ühes -üle -ümber -ümbert -a -abil -aina -ainult -alalt -alates -alati -alles -b -c -d -e -eales -ealeski -edasi -edaspidi -eelkõige -eemal -ei -eks -end -enda -enese -ennem -esialgu -f -g -h -hoopis -i -iganes -igatahes -igati -iial -iialgi -ikka -ikkagi -ilmaski -iseenda -iseenese -iseenesest -isegi -j -jah -ju -juba -juhul -just -järelikult -k -ka -kah -kas -kasvõi -keda -kestahes -kogu -koguni -kohati -kokku -kuhu -kuhugi -kuidagi -kuidas -kunagi -kus -kusagil -kusjuures -kuskil -kust -kõigepealt -küll -l -liiga -lisaks -m -miks -mil -millal -millalgi -mispärast -mistahes -mistõttu -mitte -muide -muidu -muidugi -muist -mujal -mujale -mujalt -mõlemad -mõnda -mõne -mõnikord -n -nii -niikaua -niimoodi -niipaljuke -niisama -niisiis -niivõrd -nõnda -nüüd -o -omaette -omakorda -omavahel -ometi -p -palju -paljuke -palju-palju -peaaegu -peagi -peamiselt -pigem -pisut -praegu -päris -r -rohkem -s -samas -samuti -seal -sealt -sedakorda -sedapuhku -seega -seejuures -seejärel -seekord -seepärast -seetõttu -sellepärast -seni -sestap -siia -siiani -siin -siinkohal -siis -siiski -siit -sinna -suht -š -z -ž -t -teel -teineteise -tõesti -täiesti -u -umbes -v -w -veel -veelgi -vist -võibolla -võib-olla -väga -vähemalt -välja -väljas -väljast -õ -ä -ära -ö -ü -ühtlasi -üksi -ükskõik -ülal -ülale -ülalt -üles -ülesse -üleval -ülevalt -ülimalt -üsna -x -y -aga -ega -ehk -ehkki -elik -ellik -enge -ennegu -ent -et -ja -justkui -kui -kuid -kuigi -kuivõrd -kuna -kuni -kut -mistab -muudkui -nagu -nigu -ning -olgugi -otsekui -otsenagu -selmet -sest -sestab -vaid -või -aa -adaa -adjöö -ae -ah -ahaa -ahah -ah-ah-ah -ah-haa -ahoi -ai -aidaa -aidu-raidu -aih -aijeh -aituma -aitäh -aitüma -ammuu -amps -ampsti -aptsih -ass -at -ata -at-at-at -atsih -atsihh -auh -bai-bai -bingo -braavo -brr -ee -eeh -eh -ehee -eheh -eh-eh-hee -eh-eh-ee -ehei -ehh -ehhee -einoh -ena -ennäe -ennäh -fuh -fui -fuih -haa -hah -hahaa -hah-hah-hah -halleluuja -hallo -halloo -hass -hee -heh -he-he-hee -hei -heldeke(ne) -heureka -hihii -hip-hip-hurraa -hmh -hmjah -hoh-hoh-hoo -hohoo -hoi -hollallaa -hoo -hoplaa -hopp -hops -hopsassaa -hopsti -hosianna -huh -huidii -huist -hurjah -hurjeh -hurjoh -hurjuh -hurraa -huu -hõhõh -hõi -hõissa -hõissassa -hõk -hõkk -häh -hä-hä-hää -hüvasti -ih-ah-haa -ih-ih-hii -ii-ha-ha -issake -issakene -isver -jaa-ah -ja-ah -jaah -janäe -jeeh -jeerum -jeever -jessas -jestas -juhhei -jumalaga -jumalime -jumaluke -jumalukene -jutas -kaaps -kaapsti -kaasike -kae -kalps -kalpsti -kannäe -kanäe -kappadi -kaps -kapsti -karkõmm -karkäuh -karkääks -karkääksti -karmauh -karmauhti -karnaps -karnapsti -karniuhti -karpartsaki -karpauh -karpauhti -karplauh -karplauhti -karprauh -karprauhti -karsumdi -karsumm -kartsumdi -kartsumm -karviuh -karviuhti -kaske -kassa -kauh -kauhti -keh -keksti -kepsti -khe -khm -kih -kiiks -kiiksti -kiis -kiiss -kikerii -kikerikii -kili -kilk -kilk-kõlk -kilks -kilks-kolks -kilks-kõlks -kill -killadi -killadi|-kolladi -killadi-kõlladi -killa-kolla -killa-kõlla -kill-kõll -kimps-komps -kipp -kips-kõps -kiriküüt -kirra-kõrra -kirr-kõrr -kirts -klaps -klapsti -klirdi -klirr -klonks -klops -klopsti -kluk -klu-kluu -klõks -klõksti -klõmdi -klõmm -klõmpsti -klõnks -klõnksti -klõps -klõpsti -kläu -kohva-kohva -kok -koks -koksti -kolaki -kolk -kolks -kolksti -koll -kolladi -komp -komps -kompsti -kop -kopp -koppadi -kops -kopsti -kossu -kotsu -kraa -kraak -kraaks -kraaps -kraapsti -krahh -kraks -kraksti -kraps -krapsti -krauh -krauhti -kriiks -kriiksti -kriips -kriips-kraaps -kripa-krõpa -krips-kraps -kriuh -kriuks -kriuksti -kromps -kronk -kronks -krooks -kruu -krõks -krõksti -krõpa -krõps -krõpsti -krõuh -kräu -kräuh -kräuhti -kräuks -kss -kukeleegu -kukku -kuku -kulu -kurluu -kurnäu -kuss -kussu -kõks -kõksti -kõldi -kõlks -kõlksti -kõll -kõmaki -kõmdi -kõmm -kõmps -kõpp -kõps -kõpsadi -kõpsat -kõpsti -kõrr -kõrra-kõrra -kõss -kõtt -kõõksti -kärr -kärts -kärtsti -käuks -käuksti -kääga -kääks -kääksti -köh -köki-möki -köksti -laks -laksti -lampsti -larts -lartsti -lats -latsti -leelo -legoo -lehva -liiri-lõõri -lika-lõka -likat-lõkat -limpsti -lips -lipsti -lirts -lirtsaki -lirtsti -lonksti -lops -lopsti -lorts -lortsti -luks -lups -lupsti -lurts -lurtsti -lõks -lõksti -lõmps -lõmpsti -lõnks -lõnksti -lärts -lärtsti -läts -lätsti -lörts -lörtsti -lötsti -lööps -lööpsti -marss -mats -matsti -mauh -mauhti -mh -mhh -mhmh -miau -mjaa -mkm -m-mh -mnjaa -mnjah -moens -mulks -mulksti -mull-mull -mull-mull-mull -muu -muuh -mõh -mõmm -mäh -mäts -mäu -mää -möh -möh-öh-ää -möö -müh-müh -mühüh -müks -müksti -müraki -mürr -mürts -mürtsaki -mürtsti -mütaku -müta-mäta -müta-müta -müt-müt -müt-müt-müt -müts -mütsti -mütt -naa -naah -nah -naks -naksti -nanuu -naps -napsti -nilpsti -nipsti -nirr -niuh -niuh-näuh -niuhti -noh -noksti -nolpsti -nonoh -nonoo -nonäh -noo -nooh -nooks -norr -nurr -nuuts -nõh -nõhh -nõka-nõka -nõks -nõksat-nõksat -nõks-nõks -nõksti -nõõ -nõõh -näeh -näh -nälpsti -nämm-nämm -näpsti -näts -nätsti -näu -näuh -näuhti -näuks -näuksti -nääh -nääks -nühkat-nühkat -oeh -oh -ohh -ohhh -oh-hoi -oh-hoo -ohoh -oh-oh-oo -oh-oh-hoo -ohoi -ohoo -oi -oih -oijee -oijeh -oo -ooh -oo-oh -oo-ohh -oot -ossa -ot -paa -pah -pahh -pakaa -pamm -pantsti -pardon -pardonks -parlartsti -parts -partsti -partsumdi -partsumm -pastoi -pats -patst -patsti -pau -pauh -pauhti -pele -pfui -phuh -phuuh -phäh -phähh -piiks -piip -piiri-pääri -pimm -pimm-pamm -pimm-pomm -pimm-põmm -piraki -piuks -piu-pau -plaks -plaksti -plarts -plartsti -plats -platsti -plauh -plauhh -plauhti -pliks -pliks-plaks -plinn -pliraki -plirts -plirtsti -pliu -pliuh -ploks -plotsti -plumps -plumpsti -plõks -plõksti -plõmdi -plõmm -plõnn -plärr -plärts -plärtsat -plärtsti -pläu -pläuh -plää -plörtsat -pomm -popp -pops -popsti -ports -pot -pots -potsti -pott -praks -praksti -prants -prantsaki -prantsti -prassai -prauh -prauhh -prauhti -priks -priuh -priuhh -priuh-prauh -proosit -proost -prr -prrr -prõks -prõksti -prõmdi -prõmm -prõntsti -prääk -prääks -pst -psst -ptrr -ptruu -ptüi -puh -puhh -puksti -pumm -pumps -pup-pup-pup -purts -puuh -põks -põksti -põmdi -põmm -põmmadi -põnks -põnn -põnnadi -põnt -põnts -põntsti -põraki -põrr -põrra-põrra -päh -pähh -päntsti -pää -pöörd -püh -raks -raksti -raps -rapsti -ratataa -rauh -riips -riipsti -riks -riks-raks -rips-raps -rivitult -robaki -rops -ropsaki -ropsti -ruik -räntsti -räts -röh -röhh -sah -sahh -sahkat -saps -sapsti -sauh -sauhti -servus -sihkadi-sahkadi -sihka-sahka -sihkat-sahkat -silks -silk-solk -sips -sipsti -sirr -sirr-sorr -sirts -sirtsti -siu -siuh -siuh-sauh -siuh-säuh -siuhti -siuks -siuts -skool -so -soh -solks -solksti -solpsti -soo -sooh -so-oh -soo-oh -sopp -sops -sopsti -sorr -sorts -sortsti -so-soo -soss -soss-soss -ss -sss -sst -stopp -suhkat-sahkat -sulk -sulks -sulksti -sull -sulla-sulla -sulpa-sulpa -sulps -sulpsti -sumaki -sumdi -summ -summat-summat -sups -supsaku -supsti -surts -surtsti -suss -susti -suts -sutsti -säh -sähke -särts -särtsti -säu -säuh -säuhti -taevake -taevakene -takk -tere -terekest -tibi-tibi -tikk-takk -tiks -tilk -tilks -till -tilla-talla -till-tall -tilulii -tinn -tip -tip-tap -tirr -tirtsti -tiu -tjaa -tjah -tohhoh -tohhoo -tohoh -tohoo -tok -tokk -toks -toksti -tonks -tonksti -tota -totsti -tot-tot -tprr -tpruu -trah -trahh -trallallaa -trill -trillallaa -trr -trrr -tsah -tsahh -tsilk -tsilk-tsolk -tsirr -tsiuh -tskae -tsolk -tss -tst -tsst -tsuhh -tsuk -tsumm -tsurr -tsäuh -tšao -tšš -tššš -tuk -tuks -turts -turtsti -tutki -tutkit -tutu-lutu -tutulutu -tuut -tuutu-luutu -tõks -tötsti -tümps -uh -uhh -uh-huu -uhtsa -uhtsaa -uhuh -uhuu -ui -uih -uih-aih -uijah -uijeh -uist -uit -uka -upsti -uraa -urjah -urjeh -urjoh -urjuh -urr -urraa -ust -utu -uu -uuh -vaak -vaat -vae -vaeh -vai -vat -vau -vhüüt -vidiit -viiks -vilks -vilksti -vinki-vinki -virdi -virr -viu -viudi -viuh -viuhti -voeh -voh -vohh -volks -volksti -vooh -vops -vopsti -vot -vuh -vuhti -vuih -vulks -vulksti -vull -vulpsti -vups -vupsaki -vupsaku -vupsti -vurdi -vurr -vurra-vurra -vurts -vurtsti -vutt -võe -võeh -või -võih -võrr -võts -võtt -vääks -õe -õits -õk -õkk -õrr -õss -õuh -äh -ähh -ähhähhää -äh-hää -äh-äh-hää -äiu -äiu-ää -äss -ää -ääh -äähh -öh -öhh -ök -üh -eelmine -eikeegi -eimiski -emb-kumb -enam -enim -iga -igasugune -igaüks -ise -isesugune -järgmine -keegi -kes -kumb -kumbki -kõik -meiesugune -meietaoline -midagi -mihuke -mihukene -milletaoline -milline -mina -minake -mingi -mingisugune -minusugune -minutaoline -mis -miski -miskisugune -missugune -misuke -mitmes -mitmesugune -mitu -mitu-mitu -mitu-setu -muu -mõlema -mõnesugune -mõni -mõningane -mõningas -mäherdune -määrane -naasugune -need -nemad -nendesugune -nendetaoline -nihuke -nihukene -niimitu -niisamasugune -niisugune -nisuke -nisukene -oma -omaenese -omasugune -omataoline -pool -praegune -sama -samasugune -samataoline -see -seesama -seesamane -seesamune -seesinane -seesugune -selline -sihuke -sihukene -sina -sinusugune -sinutaoline -siuke -siukene -säherdune -säärane -taoline -teiesugune -teine -teistsugune -tema -temake -temakene -temasugune -temataoline -too -toosama -toosamane -üks -üksteise -hakkama -minema -olema -pidama -saama -tegema -tulema -võima diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_eu.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_eu.txt deleted file mode 100644 index 25f1db93..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_eu.txt +++ /dev/null @@ -1,99 +0,0 @@ -# example set of basque stopwords -al -anitz -arabera -asko -baina -bat -batean -batek -bati -batzuei -batzuek -batzuetan -batzuk -bera -beraiek -berau -berauek -bere -berori -beroriek -beste -bezala -da -dago -dira -ditu -du -dute -edo -egin -ere -eta -eurak -ez -gainera -gu -gutxi -guzti -haiei -haiek -haietan -hainbeste -hala -han -handik -hango -hara -hari -hark -hartan -hau -hauei -hauek -hauetan -hemen -hemendik -hemengo -hi -hona -honek -honela -honetan -honi -hor -hori -horiei -horiek -horietan -horko -horra -horrek -horrela -horretan -horri -hortik -hura -izan -ni -noiz -nola -non -nondik -nongo -nor -nora -ze -zein -zen -zenbait -zenbat -zer -zergatik -ziren -zituen -zu -zuek -zuen -zuten diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fa.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fa.txt deleted file mode 100644 index 723641c6..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fa.txt +++ /dev/null @@ -1,313 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -# Note: by default this file is used after normalization, so when adding entries -# to this file, use the arabic 'ي' instead of 'ی' -انان -نداشته -سراسر -خياه -ايشان -وي -تاكنون -بيشتري -دوم -پس -ناشي -وگو -يا -داشتند -سپس -هنگام -هرگز -پنج -نشان -امسال -ديگر -گروهي -شدند -چطور -ده -و -دو -نخستين -ولي -چرا -چه -وسط -ه -كدام -قابل -يك -رفت -هفت -همچنين -در -هزار -بله -بلي -شايد -اما -شناسي -گرفته -دهد -داشته -دانست -داشتن -خواهيم -ميليارد -وقتيكه -امد -خواهد -جز -اورده -شده -بلكه -خدمات -شدن -برخي -نبود -بسياري -جلوگيري -حق -كردند -نوعي -بعري -نكرده -نظير -نبايد -بوده -بودن -داد -اورد -هست -جايي -شود -دنبال -داده -بايد -سابق -هيچ -همان -انجا -كمتر -كجاست -گردد -كسي -تر -مردم -تان -دادن -بودند -سري -جدا -ندارند -مگر -يكديگر -دارد -دهند -بنابراين -هنگامي -سمت -جا -انچه -خود -دادند -زياد -دارند -اثر -بدون -بهترين -بيشتر -البته -به -براساس -بيرون -كرد -بعضي -گرفت -توي -اي -ميليون -او -جريان -تول -بر -مانند -برابر -باشيم -مدتي -گويند -اكنون -تا -تنها -جديد -چند -بي -نشده -كردن -كردم -گويد -كرده -كنيم -نمي -نزد -روي -قصد -فقط -بالاي -ديگران -اين -ديروز -توسط -سوم -ايم -دانند -سوي -استفاده -شما -كنار -داريم -ساخته -طور -امده -رفته -نخست -بيست -نزديك -طي -كنيد -از -انها -تمامي -داشت -يكي -طريق -اش -چيست -روب -نمايد -گفت -چندين -چيزي -تواند -ام -ايا -با -ان -ايد -ترين -اينكه -ديگري -راه -هايي -بروز -همچنان -پاعين -كس -حدود -مختلف -مقابل -چيز -گيرد -ندارد -ضد -همچون -سازي -شان -مورد -باره -مرسي -خويش -برخوردار -چون -خارج -شش -هنوز -تحت -ضمن -هستيم -گفته -فكر -بسيار -پيش -براي -روزهاي -انكه -نخواهد -بالا -كل -وقتي -كي -چنين -كه -گيري -نيست -است -كجا -كند -نيز -يابد -بندي -حتي -توانند -عقب -خواست -كنند -بين -تمام -همه -ما -باشند -مثل -شد -اري -باشد -اره -طبق -بعد -اگر -صورت -غير -جاي -بيش -ريزي -اند -زيرا -چگونه -بار -لطفا -مي -درباره -من -ديده -همين -گذاري -برداري -علت -گذاشته -هم -فوق -نه -ها -شوند -اباد -همواره -هر -اول -خواهند -چهار -نام -امروز -مان -هاي -قبل -كنم -سعي -تازه -را -هستند -زير -جلوي -عنوان -بود diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fi.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fi.txt deleted file mode 100644 index 4372c9a0..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fi.txt +++ /dev/null @@ -1,97 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| forms of BE - -olla -olen -olet -on -olemme -olette -ovat -ole | negative form - -oli -olisi -olisit -olisin -olisimme -olisitte -olisivat -olit -olin -olimme -olitte -olivat -ollut -olleet - -en | negation -et -ei -emme -ette -eivät - -|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans -minä minun minut minua minussa minusta minuun minulla minulta minulle | I -sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you -hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she -me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we -te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you -he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they - -tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this -tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that -se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it -nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these -nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those -ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they - -kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who -ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) -mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what -mitkä | (pl) - -joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which -jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) - -| conjunctions - -että | that -ja | and -jos | if -koska | because -kuin | than -mutta | but -niin | so -sekä | and -sillä | for -tai | or -vaan | but -vai | or -vaikka | although - - -| prepositions - -kanssa | with -mukaan | according to -noin | about -poikki | across -yli | over, across - -| other - -kun | when -niin | so -nyt | now -itse | self - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fr.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fr.txt deleted file mode 100644 index 749abae6..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_fr.txt +++ /dev/null @@ -1,186 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A French stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -au | a + le -aux | a + les -avec | with -ce | this -ces | these -dans | with -de | of -des | de + les -du | de + le -elle | she -en | `of them' etc -et | and -eux | them -il | he -je | I -la | the -le | the -leur | their -lui | him -ma | my (fem) -mais | but -me | me -même | same; as in moi-même (myself) etc -mes | me (pl) -moi | me -mon | my (masc) -ne | not -nos | our (pl) -notre | our -nous | we -on | one -ou | where -par | by -pas | not -pour | for -qu | que before vowel -que | that -qui | who -sa | his, her (fem) -se | oneself -ses | his (pl) -son | his, her (masc) -sur | on -ta | thy (fem) -te | thee -tes | thy (pl) -toi | thee -ton | thy (masc) -tu | thou -un | a -une | a -vos | your (pl) -votre | your -vous | you - - | single letter forms - -c | c' -d | d' -j | j' -l | l' -à | to, at -m | m' -n | n' -s | s' -t | t' -y | there - - | forms of être (not including the infinitive): -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent - - | forms of avoir (not including the infinitive): -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent - - | Later additions (from Jean-Christophe Deschamps) -ceci | this -cela | that -celà | that -cet | this -cette | this -ici | here -ils | they -les | the (pl) -leurs | their (pl) -quel | which -quels | which -quelle | which -quelles | which -sans | without -soi | oneself - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ga.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ga.txt deleted file mode 100644 index 9ff88d74..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ga.txt +++ /dev/null @@ -1,110 +0,0 @@ - -a -ach -ag -agus -an -aon -ar -arna -as -b' -ba -beirt -bhúr -caoga -ceathair -ceathrar -chomh -chtó -chuig -chun -cois -céad -cúig -cúigear -d' -daichead -dar -de -deich -deichniúr -den -dhá -do -don -dtí -dá -dár -dó -faoi -faoin -faoina -faoinár -fara -fiche -gach -gan -go -gur -haon -hocht -i -iad -idir -in -ina -ins -inár -is -le -leis -lena -lenár -m' -mar -mo -mé -na -nach -naoi -naonúr -ná -ní -níor -nó -nócha -ocht -ochtar -os -roimh -sa -seacht -seachtar -seachtó -seasca -seisear -siad -sibh -sinn -sna -sé -sí -tar -thar -thú -triúr -trí -trína -trínár -tríocha -tú -um -ár -é -éis -í -ó -ón -óna -ónár diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_gl.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_gl.txt deleted file mode 100644 index d8760b12..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_gl.txt +++ /dev/null @@ -1,161 +0,0 @@ -# galican stopwords -a -aínda -alí -aquel -aquela -aquelas -aqueles -aquilo -aquí -ao -aos -as -así -á -ben -cando -che -co -coa -comigo -con -connosco -contigo -convosco -coas -cos -cun -cuns -cunha -cunhas -da -dalgunha -dalgunhas -dalgún -dalgúns -das -de -del -dela -delas -deles -desde -deste -do -dos -dun -duns -dunha -dunhas -e -el -ela -elas -eles -en -era -eran -esa -esas -ese -eses -esta -estar -estaba -está -están -este -estes -estiven -estou -eu -é -facer -foi -foron -fun -había -hai -iso -isto -la -las -lle -lles -lo -los -mais -me -meu -meus -min -miña -miñas -moi -na -nas -neste -nin -no -non -nos -nosa -nosas -noso -nosos -nós -nun -nunha -nuns -nunhas -o -os -ou -ó -ós -para -pero -pode -pois -pola -polas -polo -polos -por -que -se -senón -ser -seu -seus -sexa -sido -sobre -súa -súas -tamén -tan -te -ten -teñen -teño -ter -teu -teus -ti -tido -tiña -tiven -túa -túas -un -unha -unhas -uns -vos -vosa -vosas -voso -vosos -vós diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hi.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hi.txt deleted file mode 100644 index 86286bb0..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hi.txt +++ /dev/null @@ -1,235 +0,0 @@ -# Also see http://www.opensource.org/licenses/bsd-license.html -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# This file was created by Jacques Savoy and is distributed under the BSD license. -# Note: by default this file also contains forms normalized by HindiNormalizer -# for spelling variation (see section below), such that it can be used whether or -# not you enable that feature. When adding additional entries to this list, -# please add the normalized form as well. -अंदर -अत -अपना -अपनी -अपने -अभी -आदि -आप -इत्यादि -इन -इनका -इन्हीं -इन्हें -इन्हों -इस -इसका -इसकी -इसके -इसमें -इसी -इसे -उन -उनका -उनकी -उनके -उनको -उन्हीं -उन्हें -उन्हों -उस -उसके -उसी -उसे -एक -एवं -एस -ऐसे -और -कई -कर -करता -करते -करना -करने -करें -कहते -कहा -का -काफ़ी -कि -कितना -किन्हें -किन्हों -किया -किर -किस -किसी -किसे -की -कुछ -कुल -के -को -कोई -कौन -कौनसा -गया -घर -जब -जहाँ -जा -जितना -जिन -जिन्हें -जिन्हों -जिस -जिसे -जीधर -जैसा -जैसे -जो -तक -तब -तरह -तिन -तिन्हें -तिन्हों -तिस -तिसे -तो -था -थी -थे -दबारा -दिया -दुसरा -दूसरे -दो -द्वारा -न -नहीं -ना -निहायत -नीचे -ने -पर -पर -पहले -पूरा -पे -फिर -बनी -बही -बहुत -बाद -बाला -बिलकुल -भी -भीतर -मगर -मानो -मे -में -यदि -यह -यहाँ -यही -या -यिह -ये -रखें -रहा -रहे -ऱ्वासा -लिए -लिये -लेकिन -व -वर्ग -वह -वह -वहाँ -वहीं -वाले -वुह -वे -वग़ैरह -संग -सकता -सकते -सबसे -सभी -साथ -साबुत -साभ -सारा -से -सो -ही -हुआ -हुई -हुए -है -हैं -हो -होता -होती -होते -होना -होने -# additional normalized forms of the above -अपनि -जेसे -होति -सभि -तिंहों -इंहों -दवारा -इसि -किंहें -थि -उंहों -ओर -जिंहें -वहिं -अभि -बनि -हि -उंहिं -उंहें -हें -वगेरह -एसे -रवासा -कोन -निचे -काफि -उसि -पुरा -भितर -हे -बहि -वहां -कोइ -यहां -जिंहों -तिंहें -किसि -कइ -यहि -इंहिं -जिधर -इंहें -अदि -इतयादि -हुइ -कोनसा -इसकि -दुसरे -जहां -अप -किंहों -उनकि -भि -वरग -हुअ -जेसा -नहिं diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hu.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hu.txt deleted file mode 100644 index 37526da8..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hu.txt +++ /dev/null @@ -1,211 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - -| Hungarian stop word list -| prepared by Anna Tordai - -a -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -bár -be -belül -benne -cikk -cikkek -cikkeket -csak -de -e -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -illetve -ill. -ill -ilyen -ilyenkor -ison -ismét -itt -jó -jól -jobban -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -olyan -ott -össze -ő -ők -őket -pedig -persze -rá -s -saját -sem -semmi -sok -sokat -sokkal -számára -szemben -szerint -szinte -talán -tehát -teljes -tovább -továbbá -több -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -vagy -vagyis -valaki -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hy.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hy.txt deleted file mode 100644 index 60c1c50f..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_hy.txt +++ /dev/null @@ -1,46 +0,0 @@ -# example set of Armenian stopwords. -այդ -այլ -այն -այս -դու -դուք -եմ -են -ենք -ես -եք -է -էի -էին -էինք -էիր -էիք -էր -ըստ -թ -ի -ին -իսկ -իր -կամ -համար -հետ -հետո -մենք -մեջ -մի -ն -նա -նաև -նրա -նրանք -որ -որը -որոնք -որպես -ու -ում -պիտի -վրա -և diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_id.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_id.txt deleted file mode 100644 index 4617f83a..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_id.txt +++ /dev/null @@ -1,359 +0,0 @@ -# from appendix D of: A Study of Stemming Effects on Information -# Retrieval in Bahasa Indonesia -ada -adanya -adalah -adapun -agak -agaknya -agar -akan -akankah -akhirnya -aku -akulah -amat -amatlah -anda -andalah -antar -diantaranya -antara -antaranya -diantara -apa -apaan -mengapa -apabila -apakah -apalagi -apatah -atau -ataukah -ataupun -bagai -bagaikan -sebagai -sebagainya -bagaimana -bagaimanapun -sebagaimana -bagaimanakah -bagi -bahkan -bahwa -bahwasanya -sebaliknya -banyak -sebanyak -beberapa -seberapa -begini -beginian -beginikah -beginilah -sebegini -begitu -begitukah -begitulah -begitupun -sebegitu -belum -belumlah -sebelum -sebelumnya -sebenarnya -berapa -berapakah -berapalah -berapapun -betulkah -sebetulnya -biasa -biasanya -bila -bilakah -bisa -bisakah -sebisanya -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -cuma -percuma -dahulu -dalam -dan -dapat -dari -daripada -dekat -demi -demikian -demikianlah -sedemikian -dengan -depan -di -dia -dialah -dini -diri -dirinya -terdiri -dong -dulu -enggak -enggaknya -entah -entahlah -terhadap -terhadapnya -hal -hampir -hanya -hanyalah -harus -haruslah -harusnya -seharusnya -hendak -hendaklah -hendaknya -hingga -sehingga -ia -ialah -ibarat -ingin -inginkah -inginkan -ini -inikah -inilah -itu -itukah -itulah -jangan -jangankan -janganlah -jika -jikalau -juga -justru -kala -kalau -kalaulah -kalaupun -kalian -kami -kamilah -kamu -kamulah -kan -kapan -kapankah -kapanpun -dikarenakan -karena -karenanya -ke -kecil -kemudian -kenapa -kepada -kepadanya -ketika -seketika -khususnya -kini -kinilah -kiranya -sekiranya -kita -kitalah -kok -lagi -lagian -selagi -lah -lain -lainnya -melainkan -selaku -lalu -melalui -terlalu -lama -lamanya -selama -selama -selamanya -lebih -terlebih -bermacam -macam -semacam -maka -makanya -makin -malah -malahan -mampu -mampukah -mana -manakala -manalagi -masih -masihkah -semasih -masing -mau -maupun -semaunya -memang -mereka -merekalah -meski -meskipun -semula -mungkin -mungkinkah -nah -namun -nanti -nantinya -nyaris -oleh -olehnya -seorang -seseorang -pada -padanya -padahal -paling -sepanjang -pantas -sepantasnya -sepantasnyalah -para -pasti -pastilah -per -pernah -pula -pun -merupakan -rupanya -serupa -saat -saatnya -sesaat -saja -sajalah -saling -bersama -sama -sesama -sambil -sampai -sana -sangat -sangatlah -saya -sayalah -se -sebab -sebabnya -sebuah -tersebut -tersebutlah -sedang -sedangkan -sedikit -sedikitnya -segala -segalanya -segera -sesegera -sejak -sejenak -sekali -sekalian -sekalipun -sesekali -sekaligus -sekarang -sekarang -sekitar -sekitarnya -sela -selain -selalu -seluruh -seluruhnya -semakin -sementara -sempat -semua -semuanya -sendiri -sendirinya -seolah -seperti -sepertinya -sering -seringnya -serta -siapa -siapakah -siapapun -disini -disinilah -sini -sinilah -sesuatu -sesuatunya -suatu -sesudah -sesudahnya -sudah -sudahkah -sudahlah -supaya -tadi -tadinya -tak -tanpa -setelah -telah -tentang -tentu -tentulah -tentunya -tertentu -seterusnya -tapi -tetapi -setiap -tiap -setidaknya -tidak -tidakkah -tidaklah -toh -waduh -wah -wahai -sewaktu -walau -walaupun -wong -yaitu -yakni -yang diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_it.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_it.txt deleted file mode 100644 index 1219cc77..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_it.txt +++ /dev/null @@ -1,303 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | An Italian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - -ad | a (to) before vowel -al | a + il -allo | a + lo -ai | a + i -agli | a + gli -all | a + l' -agl | a + gl' -alla | a + la -alle | a + le -con | with -col | con + il -coi | con + i (forms collo, cogli etc are now very rare) -da | from -dal | da + il -dallo | da + lo -dai | da + i -dagli | da + gli -dall | da + l' -dagl | da + gll' -dalla | da + la -dalle | da + le -di | of -del | di + il -dello | di + lo -dei | di + i -degli | di + gli -dell | di + l' -degl | di + gl' -della | di + la -delle | di + le -in | in -nel | in + el -nello | in + lo -nei | in + i -negli | in + gli -nell | in + l' -negl | in + gl' -nella | in + la -nelle | in + le -su | on -sul | su + il -sullo | su + lo -sui | su + i -sugli | su + gli -sull | su + l' -sugl | su + gl' -sulla | su + la -sulle | su + le -per | through, by -tra | among -contro | against -io | I -tu | thou -lui | he -lei | she -noi | we -voi | you -loro | they -mio | my -mia | -miei | -mie | -tuo | -tua | -tuoi | thy -tue | -suo | -sua | -suoi | his, her -sue | -nostro | our -nostra | -nostri | -nostre | -vostro | your -vostra | -vostri | -vostre | -mi | me -ti | thee -ci | us, there -vi | you, there -lo | him, the -la | her, the -li | them -le | them, the -gli | to him, the -ne | from there etc -il | the -un | a -uno | a -una | a -ma | but -ed | and -se | if -perché | why, because -anche | also -come | how -dov | where (as dov') -dove | where -che | who, that -chi | who -cui | whom -non | not -più | more -quale | who, that -quanto | how much -quanti | -quanta | -quante | -quello | that -quelli | -quella | -quelle | -questo | this -questi | -questa | -queste | -si | yes -tutto | all -tutti | all - - | single letter forms: - -a | at -c | as c' for ce or ci -e | and -i | the -l | as l' -o | or - - | forms of avere, to have (not including the infinitive): - -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avrai -avrà -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute - - | forms of essere, to be (not including the infinitive): -sono -sei -è -siamo -siete -sia -siate -siano -sarò -sarai -sarà -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo - - | forms of fare, to do (not including the infinitive, fa, fat-): -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farò -farai -farà -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo - - | forms of stare, to be (not including the infinitive): -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starò -starai -starà -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ja.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_lv.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_lv.txt deleted file mode 100644 index e21a23c0..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_lv.txt +++ /dev/null @@ -1,172 +0,0 @@ -# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins -# the original list of over 800 forms was refined: -# pronouns, adverbs, interjections were removed -# -# prepositions -aiz -ap -ar -apakš -ārpus -augšpus -bez -caur -dēļ -gar -iekš -iz -kopš -labad -lejpus -līdz -no -otrpus -pa -par -pār -pēc -pie -pirms -pret -priekš -starp -šaipus -uz -viņpus -virs -virspus -zem -apakšpus -# Conjunctions -un -bet -jo -ja -ka -lai -tomēr -tikko -turpretī -arī -kaut -gan -tādēļ -tā -ne -tikvien -vien -kā -ir -te -vai -kamēr -# Particles -ar -diezin -droši -diemžēl -nebūt -ik -it -taču -nu -pat -tiklab -iekšpus -nedz -tik -nevis -turpretim -jeb -iekam -iekām -iekāms -kolīdz -līdzko -tiklīdz -jebšu -tālab -tāpēc -nekā -itin -jā -jau -jel -nē -nezin -tad -tikai -vis -tak -iekams -vien -# modal verbs -būt -biju -biji -bija -bijām -bijāt -esmu -esi -esam -esat -būšu -būsi -būs -būsim -būsiet -tikt -tiku -tiki -tika -tikām -tikāt -tieku -tiec -tiek -tiekam -tiekat -tikšu -tiks -tiksim -tiksiet -tapt -tapi -tapāt -topat -tapšu -tapsi -taps -tapsim -tapsiet -kļūt -kļuvu -kļuvi -kļuva -kļuvām -kļuvāt -kļūstu -kļūsti -kļūst -kļūstam -kļūstat -kļūšu -kļūsi -kļūs -kļūsim -kļūsiet -# verbs -varēt -varēju -varējām -varēšu -varēsim -var -varēji -varējāt -varēsi -varēsiet -varat -varēja -varēs diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_nl.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_nl.txt deleted file mode 100644 index 47a2aeac..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_nl.txt +++ /dev/null @@ -1,119 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Dutch stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large sample of Dutch text. - - | Dutch stop words frequently exhibit homonym clashes. These are indicated - | clearly below. - -de | the -en | and -van | of, from -ik | I, the ego -te | (1) chez, at etc, (2) to, (3) too -dat | that, which -die | that, those, who, which -in | in, inside -een | a, an, one -hij | he -het | the, it -niet | not, nothing, naught -zijn | (1) to be, being, (2) his, one's, its -is | is -was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river -op | on, upon, at, in, up, used up -aan | on, upon, to (as dative) -met | with, by -als | like, such as, when -voor | (1) before, in front of, (2) furrow -had | had, past tense all persons sing. of 'hebben' (have) -er | there -maar | but, only -om | round, about, for etc -hem | him -dan | then -zou | should/would, past tense all persons sing. of 'zullen' -of | or, whether, if -wat | what, something, anything -mijn | possessive and noun 'mine' -men | people, 'one' -dit | this -zo | so, thus, in this way -door | through by -over | over, across -ze | she, her, they, them -zich | oneself -bij | (1) a bee, (2) by, near, at -ook | also, too -tot | till, until -je | you -mij | me -uit | out of, from -der | Old Dutch form of 'van der' still found in surnames -daar | (1) there, (2) because -haar | (1) her, their, them, (2) hair -naar | (1) unpleasant, unwell etc, (2) towards, (3) as -heb | present first person sing. of 'to have' -hoe | how, why -heeft | present third person sing. of 'to have' -hebben | 'to have' and various parts thereof -deze | this -u | you -want | (1) for, (2) mitten, (3) rigging -nog | yet, still -zal | 'shall', first and third person sing. of verb 'zullen' (will) -me | me -zij | she, they -nu | now -ge | 'thou', still used in Belgium and south Netherlands -geen | none -omdat | because -iets | something, somewhat -worden | to become, grow, get -toch | yet, still -al | all, every, each -waren | (1) 'were' (2) to wander, (3) wares, (3) -veel | much, many -meer | (1) more, (2) lake -doen | to do, to make -toen | then, when -moet | noun 'spot/mote' and present form of 'to must' -ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' -zonder | without -kan | noun 'can' and present form of 'to be able' -hun | their, them -dus | so, consequently -alles | all, everything, anything -onder | under, beneath -ja | yes, of course -eens | once, one day -hier | here -wie | who -werd | imperfect third person sing. of 'become' -altijd | always -doch | yet, but etc -wordt | present third person sing. of 'become' -wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans -kunnen | to be able -ons | us/our -zelf | self -tegen | against, towards, at -na | after, near -reeds | already -wil | (1) present tense of 'want', (2) 'will', noun, (3) fender -kon | could; past tense of 'to be able' -niets | nothing -uw | your -iemand | somebody -geweest | been; past participle of 'be' -andere | other diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_no.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_no.txt deleted file mode 100644 index a7a2c28b..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_no.txt +++ /dev/null @@ -1,194 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Norwegian stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This stop word list is for the dominant bokmål dialect. Words unique - | to nynorsk are marked *. - - | Revised by Jan Bruusgaard , Jan 2005 - -og | and -i | in -jeg | I -det | it/this/that -at | to (w. inf.) -en | a/an -et | a/an -den | it/this/that -til | to -er | is/am/are -som | who/that -på | on -de | they / you(formal) -med | with -han | he -av | of -ikke | not -ikkje | not * -der | there -så | so -var | was/were -meg | me -seg | you -men | but -ett | one -har | have -om | about -vi | we -min | my -mitt | my -ha | have -hadde | had -hun | she -nå | now -over | over -da | when/as -ved | by/know -fra | from -du | you -ut | out -sin | your -dem | them -oss | us -opp | up -man | you/one -kan | can -hans | his -hvor | where -eller | or -hva | what -skal | shall/must -selv | self (reflective) -sjøl | self (reflective) -her | here -alle | all -vil | will -bli | become -ble | became -blei | became * -blitt | have become -kunne | could -inn | in -når | when -være | be -kom | come -noen | some -noe | some -ville | would -dere | you -som | who/which/that -deres | their/theirs -kun | only/just -ja | yes -etter | after -ned | down -skulle | should -denne | this -for | for/because -deg | you -si | hers/his -sine | hers/his -sitt | hers/his -mot | against -å | to -meget | much -hvorfor | why -dette | this -disse | these/those -uten | without -hvordan | how -ingen | none -din | your -ditt | your -blir | become -samme | same -hvilken | which -hvilke | which (plural) -sånn | such a -inni | inside/within -mellom | between -vår | our -hver | each -hvem | who -vors | us/ours -hvis | whose -både | both -bare | only/just -enn | than -fordi | as/because -før | before -mange | many -også | also -slik | just -vært | been -være | to be -båe | both * -begge | both -siden | since -dykk | your * -dykkar | yours * -dei | they * -deira | them * -deires | theirs * -deim | them * -di | your (fem.) * -då | as/when * -eg | I * -ein | a/an * -eit | a/an * -eitt | a/an * -elles | or * -honom | he * -hjå | at * -ho | she * -hoe | she * -henne | her -hennar | her/hers -hennes | hers -hoss | how * -hossen | how * -ikkje | not * -ingi | noone * -inkje | noone * -korleis | how * -korso | how * -kva | what/which * -kvar | where * -kvarhelst | where * -kven | who/whom * -kvi | why * -kvifor | why * -me | we * -medan | while * -mi | my * -mine | my * -mykje | much * -no | now * -nokon | some (masc./neut.) * -noka | some (fem.) * -nokor | some * -noko | some * -nokre | some * -si | his/hers * -sia | since * -sidan | since * -so | so * -somt | some * -somme | some * -um | about* -upp | up * -vere | be * -vore | was * -verte | become * -vort | become * -varte | became * -vart | became * - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_pt.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_pt.txt deleted file mode 100644 index acfeb01a..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_pt.txt +++ /dev/null @@ -1,253 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Portuguese stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - - | The following is a ranked list (commonest to rarest) of stopwords - | deriving from a large sample of text. - - | Extra words have been added at the end. - -de | of, from -a | the; to, at; her -o | the; him -que | who, that -e | and -do | de + o -da | de + a -em | in -um | a -para | for - | é from SER -com | with -não | not, no -uma | a -os | the; them -no | em + o -se | himself etc -na | em + a -por | for -mais | more -as | the; them -dos | de + os -como | as, like -mas | but - | foi from SER -ao | a + o -ele | he -das | de + as - | tem from TER -à | a + a -seu | his -sua | her -ou | or - | ser from SER -quando | when -muito | much - | há from HAV -nos | em + os; us -já | already, now - | está from EST -eu | I -também | also -só | only, just -pelo | per + o -pela | per + a -até | up to -isso | that -ela | he -entre | between - | era from SER -depois | after -sem | without -mesmo | same -aos | a + os - | ter from TER -seus | his -quem | whom -nas | em + as -me | me -esse | that -eles | they - | estão from EST -você | you - | tinha from TER - | foram from SER -essa | that -num | em + um -nem | nor -suas | her -meu | my -às | a + as -minha | my - | têm from TER -numa | em + uma -pelos | per + os -elas | they - | havia from HAV - | seja from SER -qual | which - | será from SER -nós | we - | tenho from TER -lhe | to him, her -deles | of them -essas | those -esses | those -pelas | per + as -este | this - | fosse from SER -dele | of him - - | other words. There are many contractions such as naquele = em+aquele, - | mo = me+o, but they are rare. - | Indefinite article plural forms are also rare. - -tu | thou -te | thee -vocês | you (plural) -vos | you -lhes | to them -meus | my -minhas -teu | thy -tua -teus -tuas -nosso | our -nossa -nossos -nossas - -dela | of her -delas | of them - -esta | this -estes | these -estas | these -aquele | that -aquela | that -aqueles | those -aquelas | those -isto | this -aquilo | that - - | forms of estar, to be (not including the infinitive): -estou -está -estamos -estão -estive -esteve -estivemos -estiveram -estava -estávamos -estavam -estivera -estivéramos -esteja -estejamos -estejam -estivesse -estivéssemos -estivessem -estiver -estivermos -estiverem - - | forms of haver, to have (not including the infinitive): -hei -há -havemos -hão -houve -houvemos -houveram -houvera -houvéramos -haja -hajamos -hajam -houvesse -houvéssemos -houvessem -houver -houvermos -houverem -houverei -houverá -houveremos -houverão -houveria -houveríamos -houveriam - - | forms of ser, to be (not including the infinitive): -sou -somos -são -era -éramos -eram -fui -foi -fomos -foram -fora -fôramos -seja -sejamos -sejam -fosse -fôssemos -fossem -for -formos -forem -serei -será -seremos -serão -seria -seríamos -seriam - - | forms of ter, to have (not including the infinitive): -tenho -tem -temos -tém -tinha -tínhamos -tinham -tive -teve -tivemos -tiveram -tivera -tivéramos -tenha -tenhamos -tenham -tivesse -tivéssemos -tivessem -tiver -tivermos -tiverem -terei -terá -teremos -terão -teria -teríamos -teriam diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ro.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ro.txt deleted file mode 100644 index 4fdee90a..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ro.txt +++ /dev/null @@ -1,233 +0,0 @@ -# This file was created by Jacques Savoy and is distributed under the BSD license. -# See http://members.unine.ch/jacques.savoy/clef/index.html. -# Also see http://www.opensource.org/licenses/bsd-license.html -acea -aceasta -această -aceea -acei -aceia -acel -acela -acele -acelea -acest -acesta -aceste -acestea -aceşti -aceştia -acolo -acum -ai -aia -aibă -aici -al -ăla -ale -alea -ălea -altceva -altcineva -am -ar -are -aş -aşadar -asemenea -asta -ăsta -astăzi -astea -ăstea -ăştia -asupra -aţi -au -avea -avem -aveţi -azi -bine -bucur -bună -ca -că -căci -când -care -cărei -căror -cărui -cât -câte -câţi -către -câtva -ce -cel -ceva -chiar -cînd -cine -cineva -cît -cîte -cîţi -cîtva -contra -cu -cum -cumva -curând -curînd -da -dă -dacă -dar -datorită -de -deci -deja -deoarece -departe -deşi -din -dinaintea -dintr -dintre -drept -după -ea -ei -el -ele -eram -este -eşti -eu -face -fără -fi -fie -fiecare -fii -fim -fiţi -iar -ieri -îi -îl -îmi -împotriva -în -înainte -înaintea -încât -încît -încotro -între -întrucât -întrucît -îţi -la -lângă -le -li -lîngă -lor -lui -mă -mâine -mea -mei -mele -mereu -meu -mi -mine -mult -multă -mulţi -ne -nicăieri -nici -nimeni -nişte -noastră -noastre -noi -noştri -nostru -nu -ori -oricând -oricare -oricât -orice -oricînd -oricine -oricît -oricum -oriunde -până -pe -pentru -peste -pînă -poate -pot -prea -prima -primul -prin -printr -sa -să -săi -sale -sau -său -se -şi -sînt -sîntem -sînteţi -spre -sub -sunt -suntem -sunteţi -ta -tăi -tale -tău -te -ţi -ţie -tine -toată -toate -tot -toţi -totuşi -tu -un -una -unde -undeva -unei -unele -uneori -unor -vă -vi -voastră -voastre -voi -voştri -vostru -vouă -vreo -vreun diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ru.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ru.txt deleted file mode 100644 index 55271400..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_ru.txt +++ /dev/null @@ -1,243 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | a russian stop word list. comments begin with vertical bar. each stop - | word is at the start of a line. - - | this is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | letter `ё' is translated to `е'. - -и | and -в | in/into -во | alternative form -не | not -что | what/that -он | he -на | on/onto -я | i -с | from -со | alternative form -как | how -а | milder form of `no' (but) -то | conjunction and form of `that' -все | all -она | she -так | so, thus -его | him -но | but -да | yes/and -ты | thou -к | towards, by -у | around, chez -же | intensifier particle -вы | you -за | beyond, behind -бы | conditional/subj. particle -по | up to, along -только | only -ее | her -мне | to me -было | it was -вот | here is/are, particle -от | away from -меня | me -еще | still, yet, more -нет | no, there isnt/arent -о | about -из | out of -ему | to him -теперь | now -когда | when -даже | even -ну | so, well -вдруг | suddenly -ли | interrogative particle -если | if -уже | already, but homonym of `narrower' -или | or -ни | neither -быть | to be -был | he was -него | prepositional form of его -до | up to -вас | you accusative -нибудь | indef. suffix preceded by hyphen -опять | again -уж | already, but homonym of `adder' -вам | to you -сказал | he said -ведь | particle `after all' -там | there -потом | then -себя | oneself -ничего | nothing -ей | to her -может | usually with `быть' as `maybe' -они | they -тут | here -где | where -есть | there is/are -надо | got to, must -ней | prepositional form of ей -для | for -мы | we -тебя | thee -их | them, their -чем | than -была | she was -сам | self -чтоб | in order to -без | without -будто | as if -человек | man, person, one -чего | genitive form of `what' -раз | once -тоже | also -себе | to oneself -под | beneath -жизнь | life -будет | will be -ж | short form of intensifer particle `же' -тогда | then -кто | who -этот | this -говорил | was saying -того | genitive form of `that' -потому | for that reason -этого | genitive form of `this' -какой | which -совсем | altogether -ним | prepositional form of `его', `они' -здесь | here -этом | prepositional form of `этот' -один | one -почти | almost -мой | my -тем | instrumental/dative plural of `тот', `то' -чтобы | full form of `in order that' -нее | her (acc.) -кажется | it seems -сейчас | now -были | they were -куда | where to -зачем | why -сказать | to say -всех | all (acc., gen. preposn. plural) -никогда | never -сегодня | today -можно | possible, one can -при | by -наконец | finally -два | two -об | alternative form of `о', about -другой | another -хоть | even -после | after -над | above -больше | more -тот | that one (masc.) -через | across, in -эти | these -нас | us -про | about -всего | in all, only, of all -них | prepositional form of `они' (they) -какая | which, feminine -много | lots -разве | interrogative particle -сказала | she said -три | three -эту | this, acc. fem. sing. -моя | my, feminine -впрочем | moreover, besides -хорошо | good -свою | ones own, acc. fem. sing. -этой | oblique form of `эта', fem. `this' -перед | in front of -иногда | sometimes -лучше | better -чуть | a little -том | preposn. form of `that one' -нельзя | one must not -такой | such a one -им | to them -более | more -всегда | always -конечно | of course -всю | acc. fem. sing of `all' -между | between - - - | b: some paradigms - | - | personal pronouns - | - | я меня мне мной [мною] - | ты тебя тебе тобой [тобою] - | он его ему им [него, нему, ним] - | она ее эи ею [нее, нэи, нею] - | оно его ему им [него, нему, ним] - | - | мы нас нам нами - | вы вас вам вами - | они их им ими [них, ним, ними] - | - | себя себе собой [собою] - | - | demonstrative pronouns: этот (this), тот (that) - | - | этот эта это эти - | этого эты это эти - | этого этой этого этих - | этому этой этому этим - | этим этой этим [этою] этими - | этом этой этом этих - | - | тот та то те - | того ту то те - | того той того тех - | тому той тому тем - | тем той тем [тою] теми - | том той том тех - | - | determinative pronouns - | - | (a) весь (all) - | - | весь вся все все - | всего всю все все - | всего всей всего всех - | всему всей всему всем - | всем всей всем [всею] всеми - | всем всей всем всех - | - | (b) сам (himself etc) - | - | сам сама само сами - | самого саму само самих - | самого самой самого самих - | самому самой самому самим - | самим самой самим [самою] самими - | самом самой самом самих - | - | stems of verbs `to be', `to have', `to do' and modal - | - | быть бы буд быв есть суть - | име - | дел - | мог мож мочь - | уме - | хоч хот - | долж - | можн - | нужн - | нельзя - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_sv.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_sv.txt deleted file mode 100644 index 096f87f6..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_sv.txt +++ /dev/null @@ -1,133 +0,0 @@ - | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt - | This file is distributed under the BSD License. - | See http://snowball.tartarus.org/license.php - | Also see http://www.opensource.org/licenses/bsd-license.html - | - Encoding was converted to UTF-8. - | - This notice was added. - | - | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" - - | A Swedish stop word list. Comments begin with vertical bar. Each stop - | word is at the start of a line. - - | This is a ranked list (commonest to rarest) of stopwords derived from - | a large text sample. - - | Swedish stop words occasionally exhibit homonym clashes. For example - | så = so, but also seed. These are indicated clearly below. - -och | and -det | it, this/that -att | to (with infinitive) -i | in, at -en | a -jag | I -hon | she -som | who, that -han | he -på | on -den | it, this/that -med | with -var | where, each -sig | him(self) etc -för | for -så | so (also: seed) -till | to -är | is -men | but -ett | a -om | if; around, about -hade | had -de | they, these/those -av | of -icke | not, no -mig | me -du | you -henne | her -då | then, when -sin | his -nu | now -har | have -inte | inte någon = no one -hans | his -honom | him -skulle | 'sake' -hennes | her -där | there -min | my -man | one (pronoun) -ej | nor -vid | at, by, on (also: vast) -kunde | could -något | some etc -från | from, off -ut | out -när | when -efter | after, behind -upp | up -vi | we -dem | them -vara | be -vad | what -över | over -än | than -dig | you -kan | can -sina | his -här | here -ha | have -mot | towards -alla | all -under | under (also: wonder) -någon | some etc -eller | or (else) -allt | all -mycket | much -sedan | since -ju | why -denna | this/that -själv | myself, yourself etc -detta | this/that -åt | to -utan | without -varit | was -hur | how -ingen | no -mitt | my -ni | you -bli | to be, become -blev | from bli -oss | us -din | thy -dessa | these/those -några | some etc -deras | their -blir | from bli -mina | my -samma | (the) same -vilken | who, that -er | you, your -sådan | such a -vår | our -blivit | from bli -dess | its -inom | within -mellan | between -sådant | such a -varför | why -varje | each -vilka | who, that -ditt | thy -vem | who -vilket | who, that -sitta | his -sådana | such a -vart | each -dina | thy -vars | whose -vårt | our -våra | our -ert | your -era | your -vilkas | whose - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_th.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_th.txt deleted file mode 100644 index 07f0fabe..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_th.txt +++ /dev/null @@ -1,119 +0,0 @@ -# Thai stopwords from: -# "Opinion Detection in Thai Political News Columns -# Based on Subjectivity Analysis" -# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak -ไว้ -ไม่ -ไป -ได้ -ให้ -ใน -โดย -แห่ง -แล้ว -และ -แรก -แบบ -แต่ -เอง -เห็น -เลย -เริ่ม -เรา -เมื่อ -เพื่อ -เพราะ -เป็นการ -เป็น -เปิดเผย -เปิด -เนื่องจาก -เดียวกัน -เดียว -เช่น -เฉพาะ -เคย -เข้า -เขา -อีก -อาจ -อะไร -ออก -อย่าง -อยู่ -อยาก -หาก -หลาย -หลังจาก -หลัง -หรือ -หนึ่ง -ส่วน -ส่ง -สุด -สําหรับ -ว่า -วัน -ลง -ร่วม -ราย -รับ -ระหว่าง -รวม -ยัง -มี -มาก -มา -พร้อม -พบ -ผ่าน -ผล -บาง -น่า -นี้ -นํา -นั้น -นัก -นอกจาก -ทุก -ที่สุด -ที่ -ทําให้ -ทํา -ทาง -ทั้งนี้ -ทั้ง -ถ้า -ถูก -ถึง -ต้อง -ต่างๆ -ต่าง -ต่อ -ตาม -ตั้งแต่ -ตั้ง -ด้าน -ด้วย -ดัง -ซึ่ง -ช่วง -จึง -จาก -จัด -จะ -คือ -ความ -ครั้ง -คง -ขึ้น -ของ -ขอ -ขณะ -ก่อน -ก็ -การ -กับ -กัน -กว่า -กล่าว diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_tr.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_tr.txt deleted file mode 100644 index 84d9408d..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/stopwords_tr.txt +++ /dev/null @@ -1,212 +0,0 @@ -# Turkish stopwords from LUCENE-559 -# merged with the list from "Information Retrieval on Turkish Texts" -# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) -acaba -altmış -altı -ama -ancak -arada -aslında -ayrıca -bana -bazı -belki -ben -benden -beni -benim -beri -beş -bile -bin -bir -birçok -biri -birkaç -birkez -birşey -birşeyi -biz -bize -bizden -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunlar -bunları -bunların -bunu -bunun -burada -çok -çünkü -da -daha -dahi -de -defa -değil -diğer -diye -doksan -dokuz -dolayı -dolayısıyla -dört -edecek -eden -ederek -edilecek -ediliyor -edilmesi -ediyor -eğer -elli -en -etmesi -etti -ettiği -ettiğini -gibi -göre -halen -hangi -hatta -hem -henüz -hep -hepsi -her -herhangi -herkesin -hiç -hiçbir -için -iki -ile -ilgili -ise -işte -itibaren -itibariyle -kadar -karşın -katrilyon -kendi -kendilerine -kendini -kendisi -kendisine -kendisini -kez -ki -kim -kimden -kime -kimi -kimse -kırk -milyar -milyon -mu -mü -mı -nasıl -ne -neden -nedenle -nerde -nerede -nereye -niye -niçin -o -olan -olarak -oldu -olduğu -olduğunu -olduklarını -olmadı -olmadığı -olmak -olması -olmayan -olmaz -olsa -olsun -olup -olur -olursa -oluyor -on -ona -ondan -onlar -onlardan -onları -onların -onu -onun -otuz -oysa -öyle -pek -rağmen -sadece -sanki -sekiz -seksen -sen -senden -seni -senin -siz -sizden -sizi -sizin -şey -şeyden -şeyi -şeyler -şöyle -şu -şuna -şunda -şundan -şunları -şunu -tarafından -trilyon -tüm -üç -üzere -var -vardı -ve -veya -ya -yani -yapacak -yapılan -yapılması -yapıyor -yapmak -yaptı -yaptığı -yaptığını -yaptıkları -yedi -yerine -yetmiş -yine -yirmi -yoksa -yüz -zaten diff --git a/search-solr/bitnami/solr/server/solr/search/conf/lang/userdict_ja.txt b/search-solr/bitnami/solr/server/solr/search/conf/lang/userdict_ja.txt deleted file mode 100644 index 6f0368e4..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/lang/userdict_ja.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) -# -# Add entries to this file in order to override the statistical model in terms -# of segmentation, readings and part-of-speech tags. Notice that entries do -# not have weights since they are always used when found. This is by-design -# in order to maximize ease-of-use. -# -# Entries are defined using the following CSV format: -# , ... , ... , -# -# Notice that a single half-width space separates tokens and readings, and -# that the number tokens and readings must match exactly. -# -# Also notice that multiple entries with the same is undefined. -# -# Whitespace only lines are ignored. Comments are not allowed on entry lines. -# - -# Custom segmentation for kanji compounds -日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 -関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 - -# Custom segmentation for compound katakana -トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 -ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 - -# Custom reading for former sumo wrestler -朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/search-solr/bitnami/solr/server/solr/search/conf/protwords.txt b/search-solr/bitnami/solr/server/solr/search/conf/protwords.txt deleted file mode 100644 index 1dfc0abe..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/protwords.txt +++ /dev/null @@ -1,21 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -# Some non-words that normally won't be encountered, -# just to test that they won't be stemmed. -dontstems -zwhacky - diff --git a/search-solr/bitnami/solr/server/solr/search/conf/stopwords.txt b/search-solr/bitnami/solr/server/solr/search/conf/stopwords.txt deleted file mode 100644 index ae1e83ee..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/stopwords.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/search-solr/bitnami/solr/server/solr/search/conf/synonyms.txt b/search-solr/bitnami/solr/server/solr/search/conf/synonyms.txt deleted file mode 100644 index eab4ee87..00000000 --- a/search-solr/bitnami/solr/server/solr/search/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/search-solr/bitnami/solr/server/solr/zoo.cfg b/search-solr/bitnami/solr/server/solr/zoo.cfg deleted file mode 100644 index 4ef8dcea..00000000 --- a/search-solr/bitnami/solr/server/solr/zoo.cfg +++ /dev/null @@ -1,37 +0,0 @@ -# The number of milliseconds of each tick -tickTime=2000 -# The number of ticks that the initial -# synchronization phase can take -initLimit=10 -# The number of ticks that can pass between -# sending a request and getting an acknowledgement -syncLimit=5 - -# the directory where the snapshot is stored. -# dataDir=/opt/zookeeper/data -# NOTE: Solr defaults the dataDir to /zoo_data - -# the address that embedded zookeeper will bind to -clientPortAddress=127.0.0.1 - -# the port at which the clients will connect -# clientPort=2181 -# NOTE: Solr sets this based on zkRun / zkHost params - -# the maximum number of client connections. -# increase this if you need to handle more clients -#maxClientCnxns=60 -# -# Be sure to read the maintenance section of the -# administrator guide before turning on autopurge. -# -# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance -# -# The number of snapshots to retain in dataDir -#autopurge.snapRetainCount=3 -# Purge task interval in hours -# Set to "0" to disable auto purge feature -#autopurge.purgeInterval=1 - -# Disable ZK AdminServer since we do not use it -admin.enableServer=false \ No newline at end of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_ca.txt b/search-solr/solr/business/conf/lang/contractions_ca.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_ca.txt rename to search-solr/solr/business/conf/lang/contractions_ca.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_fr.txt b/search-solr/solr/business/conf/lang/contractions_fr.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_fr.txt rename to search-solr/solr/business/conf/lang/contractions_fr.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_ga.txt b/search-solr/solr/business/conf/lang/contractions_ga.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_ga.txt rename to search-solr/solr/business/conf/lang/contractions_ga.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_it.txt b/search-solr/solr/business/conf/lang/contractions_it.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/contractions_it.txt rename to search-solr/solr/business/conf/lang/contractions_it.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/hyphenations_ga.txt b/search-solr/solr/business/conf/lang/hyphenations_ga.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/hyphenations_ga.txt rename to search-solr/solr/business/conf/lang/hyphenations_ga.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stemdict_nl.txt b/search-solr/solr/business/conf/lang/stemdict_nl.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stemdict_nl.txt rename to search-solr/solr/business/conf/lang/stemdict_nl.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stoptags_ja.txt b/search-solr/solr/business/conf/lang/stoptags_ja.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stoptags_ja.txt rename to search-solr/solr/business/conf/lang/stoptags_ja.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ar.txt b/search-solr/solr/business/conf/lang/stopwords_ar.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ar.txt rename to search-solr/solr/business/conf/lang/stopwords_ar.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_bg.txt b/search-solr/solr/business/conf/lang/stopwords_bg.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_bg.txt rename to search-solr/solr/business/conf/lang/stopwords_bg.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ca.txt b/search-solr/solr/business/conf/lang/stopwords_ca.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ca.txt rename to search-solr/solr/business/conf/lang/stopwords_ca.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_cz.txt b/search-solr/solr/business/conf/lang/stopwords_cz.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_cz.txt rename to search-solr/solr/business/conf/lang/stopwords_cz.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_da.txt b/search-solr/solr/business/conf/lang/stopwords_da.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_da.txt rename to search-solr/solr/business/conf/lang/stopwords_da.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_de.txt b/search-solr/solr/business/conf/lang/stopwords_de.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_de.txt rename to search-solr/solr/business/conf/lang/stopwords_de.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_el.txt b/search-solr/solr/business/conf/lang/stopwords_el.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_el.txt rename to search-solr/solr/business/conf/lang/stopwords_el.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_en.txt b/search-solr/solr/business/conf/lang/stopwords_en.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_en.txt rename to search-solr/solr/business/conf/lang/stopwords_en.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_es.txt b/search-solr/solr/business/conf/lang/stopwords_es.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_es.txt rename to search-solr/solr/business/conf/lang/stopwords_es.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_et.txt b/search-solr/solr/business/conf/lang/stopwords_et.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_et.txt rename to search-solr/solr/business/conf/lang/stopwords_et.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_eu.txt b/search-solr/solr/business/conf/lang/stopwords_eu.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_eu.txt rename to search-solr/solr/business/conf/lang/stopwords_eu.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fa.txt b/search-solr/solr/business/conf/lang/stopwords_fa.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fa.txt rename to search-solr/solr/business/conf/lang/stopwords_fa.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fi.txt b/search-solr/solr/business/conf/lang/stopwords_fi.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fi.txt rename to search-solr/solr/business/conf/lang/stopwords_fi.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fr.txt b/search-solr/solr/business/conf/lang/stopwords_fr.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_fr.txt rename to search-solr/solr/business/conf/lang/stopwords_fr.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ga.txt b/search-solr/solr/business/conf/lang/stopwords_ga.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ga.txt rename to search-solr/solr/business/conf/lang/stopwords_ga.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_gl.txt b/search-solr/solr/business/conf/lang/stopwords_gl.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_gl.txt rename to search-solr/solr/business/conf/lang/stopwords_gl.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hi.txt b/search-solr/solr/business/conf/lang/stopwords_hi.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hi.txt rename to search-solr/solr/business/conf/lang/stopwords_hi.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hu.txt b/search-solr/solr/business/conf/lang/stopwords_hu.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hu.txt rename to search-solr/solr/business/conf/lang/stopwords_hu.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hy.txt b/search-solr/solr/business/conf/lang/stopwords_hy.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_hy.txt rename to search-solr/solr/business/conf/lang/stopwords_hy.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_id.txt b/search-solr/solr/business/conf/lang/stopwords_id.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_id.txt rename to search-solr/solr/business/conf/lang/stopwords_id.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_it.txt b/search-solr/solr/business/conf/lang/stopwords_it.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_it.txt rename to search-solr/solr/business/conf/lang/stopwords_it.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ja.txt b/search-solr/solr/business/conf/lang/stopwords_ja.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ja.txt rename to search-solr/solr/business/conf/lang/stopwords_ja.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_lv.txt b/search-solr/solr/business/conf/lang/stopwords_lv.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_lv.txt rename to search-solr/solr/business/conf/lang/stopwords_lv.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_nl.txt b/search-solr/solr/business/conf/lang/stopwords_nl.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_nl.txt rename to search-solr/solr/business/conf/lang/stopwords_nl.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_no.txt b/search-solr/solr/business/conf/lang/stopwords_no.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_no.txt rename to search-solr/solr/business/conf/lang/stopwords_no.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_pt.txt b/search-solr/solr/business/conf/lang/stopwords_pt.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_pt.txt rename to search-solr/solr/business/conf/lang/stopwords_pt.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ro.txt b/search-solr/solr/business/conf/lang/stopwords_ro.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ro.txt rename to search-solr/solr/business/conf/lang/stopwords_ro.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ru.txt b/search-solr/solr/business/conf/lang/stopwords_ru.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_ru.txt rename to search-solr/solr/business/conf/lang/stopwords_ru.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_sv.txt b/search-solr/solr/business/conf/lang/stopwords_sv.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_sv.txt rename to search-solr/solr/business/conf/lang/stopwords_sv.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_th.txt b/search-solr/solr/business/conf/lang/stopwords_th.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_th.txt rename to search-solr/solr/business/conf/lang/stopwords_th.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_tr.txt b/search-solr/solr/business/conf/lang/stopwords_tr.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/stopwords_tr.txt rename to search-solr/solr/business/conf/lang/stopwords_tr.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/userdict_ja.txt b/search-solr/solr/business/conf/lang/userdict_ja.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/lang/userdict_ja.txt rename to search-solr/solr/business/conf/lang/userdict_ja.txt diff --git a/search-solr/bitnami/solr/server/solr/search/conf/managed-schema.xml b/search-solr/solr/business/conf/managed-schema.xml similarity index 83% rename from search-solr/bitnami/solr/server/solr/search/conf/managed-schema.xml rename to search-solr/solr/business/conf/managed-schema.xml index 65561917..48353a44 100644 --- a/search-solr/bitnami/solr/server/solr/search/conf/managed-schema.xml +++ b/search-solr/solr/business/conf/managed-schema.xml @@ -1,6 +1,6 @@ - identifier + id @@ -209,124 +209,140 @@ - + + + + + + + + + + + + + - + - + - + - + - + - + - - - + - - - + - - + - + - - - - - + - + - + - + - - + - - - + - + + + + + + + + + + + + + + + + + - + - - + + - + + + + - + - + - - - + - + - + - @@ -368,18 +384,17 @@ + + - - + + - - - \ No newline at end of file diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/protwords.txt b/search-solr/solr/business/conf/protwords.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/protwords.txt rename to search-solr/solr/business/conf/protwords.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml b/search-solr/solr/business/conf/solrconfig.xml similarity index 64% rename from search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml rename to search-solr/solr/business/conf/solrconfig.xml index 6abbd0ee..95918e7a 100644 --- a/search-solr/bitnami/solr/server/solr/configsets/sample_techproducts_configs/conf/solrconfig.xml +++ b/search-solr/solr/business/conf/solrconfig.xml @@ -18,7 +18,7 @@ - 9.0 + 9.3 - - - - - - - - - + + Lucene will flush based on whichever limit is hit first. --> @@ -229,18 +220,18 @@ - - - - - - + + + + + @@ -250,11 +241,10 @@ To aid in advanced debugging, Lucene provides an "InfoStream" of detailed information when indexing. - Setting the value to true will instruct the underlying Lucene - IndexWriter to write its info stream to solr's log. By default, - this is enabled here, and controlled through log4j2.xml + Setting The value to true will instruct the underlying Lucene + IndexWriter to write its debugging info the specified file --> - true + @@ -284,7 +274,7 @@ Instead of enabling autoCommit, consider using "commitWithin" when adding documents. - https://solr.apache.org/guide/indexing-with-update-handlers.html + https://solr.apache.org/guide/solr/latest/indexing-guide/indexing-with-update-handlers.html maxDocs - Maximum number of documents to add since the last commit before automatically triggering a new commit. @@ -299,10 +289,10 @@ If the updateLog is enabled, then it's highly recommended to have some sort of hard autoCommit to limit the log size. --> - - ${solr.autoCommit.maxTime:15000} - false - + + ${solr.autoCommit.maxTime:120000} + false + - - ${solr.autoSoftCommit.maxTime:-1} - + + ${solr.autoSoftCommit.maxTime:-1} + ${solr.max.booleanClauses:1024} - - - -1 - - @@ -392,10 +370,12 @@ unordered sets of *all* documents that match a query. When a new searcher is opened, its caches may be prepopulated or "autowarmed" using data from caches in the old searcher. - autowarmCount is the number of items to prepopulate. + autowarmCount is the number of items to prepopulate. For + CaffeineCache, the autowarmed items will be the most recently + accessed items. Parameters: - class - the SolrCache implementation + class - the SolrCache implementation (CaffeineCache by default) size - the maximum number of entries in the cache initialSize - the initial capacity (number of entries) of the cache. (see java.util.HashMap) @@ -411,15 +391,15 @@ + Caches results of searches - ordered lists of document ids + (DocList) based on a query, a sort, and the range of documents requested. + Additional supported parameter by CaffeineCache: + maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed + to occupy + --> + initialSize="512" + autowarmCount="0"/> + class="solr.CaffeineCache" + size="10" + initialSize="0" + autowarmCount="10" + regenerator="solr.NoOpRegenerator" /> - - - true - - + For most situations, this will not be useful unless you + frequently get the same search repeatedly with different sort + options, and none of them ever use "score" + --> + - - 20 + An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. + --> + 20 - - 200 + + 200 - + - firstSearcher - fired whenever a new searcher is being - prepared but there is no current registered searcher to handle - requests or to gain autowarming data from. + + newSearcher - fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka + registered). It can be used to prime certain caches to + prevent long request times for certain requests. + + firstSearcher - fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. + + + --> @@ -560,9 +540,11 @@ + @@ -581,30 +563,27 @@ Circuit Breaker Section - This section consists of configurations for circuit breakers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> - - - + - - + + + + + - + --> + + + + _text_ + + + - - explicit - 10 - - - - - - + + explicit + 10 + + - - explicit - json - true - text - - - - - text + explicit + json + true - - - - + + + startup + commit + optimize + managed-schema.xml,_schema_analysis_synonyms_ADDRESS.json,_schema_analysis_synonyms_NAME.json + + 1 + - https://solr.apache.org/guide/indexing-with-tika.html + + + + name + name_suggest + BlendedInfixLookupFactory + position_linear + weight + text_stemmed + false + false + false + + - --> - + - true - ignored_ - - - true - links - ignored_ + json + true + true + name + 10 + + suggest + - - - 5 - - - text_general @@ -835,7 +811,7 @@ default - text + _text_ solr.DirectSolrSpellChecker internal @@ -857,6 +833,7 @@ + - - - - - - - + --> @@ -926,7 +865,6 @@ collations (re-written queries) can include a combination of corrections from both spellcheckers --> default - wordbreak on true 10 @@ -942,176 +880,13 @@ - - - - mySuggester - FuzzyLookupFactory - DocumentDictionaryFactory - cat - price - string - false - - - - - - true - 10 - - - suggest - - - - - - - - - - - true - - - tvComponent - - - - - - - - - lingo3g - true - Lingo3G - name, features - true - English - - - - lingo - Lingo - name, features - true - English - - - - stc - STC - name, features - true - English - - - - kmeans - Bisecting K-Means - name, features - true - English - - - - - - - - true - - edismax - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - *:* - 100 - *,score - - - - - clustering - - - - - - - - string - elevate.xml - - - - - - explicit - - - elevator - - - + via parameters. The below configuration supports hl.method=original and fastVector. --> @@ -1214,78 +989,111 @@ via parameters. The below configuration supports hl.method=original and fastVec - - + Field type guessing update request processors that will + attempt to parse string-typed field values as Booleans, Longs, + Doubles, or Dates, and then add schema fields with the guessed + field types Text content will be indexed as "text_general" as + well as a copy to a plain string version in *_str. + See the updateRequestProcessorChain defined later for the order they are executed in. - - + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z + yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z + yyyy-MM-dd HH:mm[:ss[.SSS]][z + yyyy-MM-dd HH:mm[:ss[,SSS]][z + [EEE, ]dd MMM yyyy HH:mm[:ss] z + EEEE, dd-MMM-yy HH:mm:ss z + EEE MMM ppd HH:mm:ss [z ]yyyy + + + + + java.lang.String + text_general + + *_str + 256 + + + true + + + java.lang.Boolean + booleans + + + java.util.Date + pdates + + + java.lang.Long + java.lang.Integer + plongs + + + java.lang.Number + pdoubles + + + + + + + + + - - + + + + true + id + id + solr.processor.Lookup3Signature + - --> + - - - 5 - + + - - - - - - QUERY_DOC_FV - diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/stopwords.txt b/search-solr/solr/business/conf/stopwords.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/stopwords.txt rename to search-solr/solr/business/conf/stopwords.txt diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/synonyms.txt b/search-solr/solr/business/conf/synonyms.txt similarity index 100% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/synonyms.txt rename to search-solr/solr/business/conf/synonyms.txt diff --git a/search-solr/bitnami/solr/server/solr/search/conf/unused-field-types/unused-language-fieldtypes.xml b/search-solr/solr/business/conf/unused-field-types/unused-language-fieldtypes.xml similarity index 100% rename from search-solr/bitnami/solr/server/solr/search/conf/unused-field-types/unused-language-fieldtypes.xml rename to search-solr/solr/business/conf/unused-field-types/unused-language-fieldtypes.xml diff --git a/search-solr/bitnami/solr/server/solr/search/core.properties b/search-solr/solr/business/core.properties similarity index 84% rename from search-solr/bitnami/solr/server/solr/search/core.properties rename to search-solr/solr/business/core.properties index 5d768b36..48589c65 100644 --- a/search-solr/bitnami/solr/server/solr/search/core.properties +++ b/search-solr/solr/business/core.properties @@ -1,4 +1,4 @@ #Written by CorePropertiesLocator #Thu Jun 16 22:52:39 UTC 2022 dataDir=data -name=search +name=business diff --git a/search-solr/solr/business/solr.in.sh b/search-solr/solr/business/solr.in.sh new file mode 100644 index 00000000..47af868e --- /dev/null +++ b/search-solr/solr/business/solr.in.sh @@ -0,0 +1,287 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Settings here will override settings in existing env vars or in bin/solr. The default shipped state +# of this file is completely commented. + +# By default the script will use JAVA_HOME to determine which java +# to use, but you can set a specific path for Solr to use without +# affecting other Java applications on your server/workstation. +#SOLR_JAVA_HOME="" + +# This controls the number of seconds that the solr script will wait for +# Solr to stop gracefully. If the graceful stop fails, the script will +# forcibly stop Solr. +#SOLR_STOP_WAIT="180" + +# This controls the number of seconds that the solr script will wait for +# Solr to start. If the start fails, the script will give up waiting and +# display the last few lines of the logfile. +#SOLR_START_WAIT="$SOLR_STOP_WAIT" + +# Increase Java Heap as needed to support your indexing / query needs +#SOLR_HEAP="512m" + +# Expert: If you want finer control over memory options, specify them directly +# Comment out SOLR_HEAP if you are using this though, that takes precedence +#SOLR_JAVA_MEM="-Xms512m -Xmx512m" + +# Enable verbose GC logging... +# * If this is unset, various default options will be selected depending on which JVM version is in use +# * For Java 8: if this is set, additional params will be added to specify the log file & rotation +# * For Java 9 or higher: each included opt param that starts with '-Xlog:gc', but does not include an +# output specifier, will have a 'file' output specifier (as well as formatting & rollover options) +# appended, using the effective value of the SOLR_LOGS_DIR. +# +#GC_LOG_OPTS='-Xlog:gc*' # (Java 9+) +#GC_LOG_OPTS="-verbose:gc -XX:+PrintHeapAtGC -XX:+PrintGCDetails \ +# -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime" + +# These GC settings have shown to work well for a number of common Solr workloads +#GC_TUNE=" \ +#-XX:+ExplicitGCInvokesConcurrent \ +#-XX:SurvivorRatio=4 \ +#-XX:TargetSurvivorRatio=90 \ +#-XX:MaxTenuringThreshold=8 \ +#-XX:+UseConcMarkSweepGC \ +#-XX:ConcGCThreads=4 -XX:ParallelGCThreads=4 \ +#-XX:+CMSScavengeBeforeRemark \ +#-XX:PretenureSizeThreshold=64m \ +#-XX:+UseCMSInitiatingOccupancyOnly \ +#-XX:CMSInitiatingOccupancyFraction=50 \ +#-XX:CMSMaxAbortablePrecleanTime=6000 \ +#-XX:+CMSParallelRemarkEnabled \ +#-XX:+ParallelRefProcEnabled etc. + +# Set the ZooKeeper connection string if using an external ZooKeeper ensemble +# e.g. host1:2181,host2:2181/chroot +# Leave empty if not using SolrCloud +#ZK_HOST="" + +# Set to true if your ZK host has a chroot path, and you want to create it automatically. +#ZK_CREATE_CHROOT=true + +# Set the ZooKeeper client timeout (for SolrCloud mode) +#ZK_CLIENT_TIMEOUT="30000" + +# By default the start script uses "localhost"; override the hostname here +# for production SolrCloud environments to control the hostname exposed to cluster state +#SOLR_HOST="192.168.1.1" + +# By default Solr will try to connect to Zookeeper with 30 seconds in timeout; override the timeout if needed +#SOLR_WAIT_FOR_ZK="30" + +# By default the start script uses UTC; override the timezone if needed +#SOLR_TIMEZONE="UTC" + +# Set to true to activate the JMX RMI connector to allow remote JMX client applications +# to monitor the JVM hosting Solr; set to "false" to disable that behavior +# (false is recommended in production environments) +#ENABLE_REMOTE_JMX_OPTS="false" + +# The script will use SOLR_PORT+10000 for the RMI_PORT or you can set it here +# RMI_PORT=18983 + +# Anything you add to the SOLR_OPTS variable will be included in the java +# start command line as-is, in ADDITION to other options. If you specify the +# -a option on start script, those options will be appended as well. Examples: +#SOLR_OPTS="$SOLR_OPTS -Dsolr.autoSoftCommit.maxTime=3000" +#SOLR_OPTS="$SOLR_OPTS -Dsolr.autoCommit.maxTime=60000" +#SOLR_OPTS="$SOLR_OPTS -Dsolr.clustering.enabled=true" + +# Location where the bin/solr script will save PID files for running instances +# If not set, the script will create PID files in $SOLR_TIP/bin +#SOLR_PID_DIR= + +# Path to a directory for Solr to store cores and their data. By default, Solr will use server/solr +# If solr.xml is not stored in ZooKeeper, this directory needs to contain solr.xml +#SOLR_HOME= + +# Path to a directory that Solr will use as root for data folders for each core. +# If not set, defaults to /data. Overridable per core through 'dataDir' core property +#SOLR_DATA_HOME= + +# Solr provides a default Log4J configuration xml file in server/resources +# however, you may want to customize the log settings and file appender location +# so you can point the script to use a different log4j2.xml file +#LOG4J_PROPS=/var/solr/log4j2.xml + +# Changes the logging level. Valid values: ALL, TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF. Default is INFO +# This is an alternative to changing the rootLogger in log4j2.xml +#SOLR_LOG_LEVEL=INFO + +# Location where Solr should write logs to. Absolute or relative to solr start dir +#SOLR_LOGS_DIR=logs + +# Enables jetty request log for all requests +#SOLR_REQUESTLOG_ENABLED=true + +# Sets the port Solr binds to, default is 8983 +#SOLR_PORT=8983 + +# Restrict access to solr by IP address. +# Specify a comma-separated list of addresses or networks, for example: +# 127.0.0.1, 192.168.0.0/24, [::1], [2000:123:4:5::]/64 +#SOLR_IP_ALLOWLIST= + +# Block access to solr from specific IP addresses. +# Specify a comma-separated list of addresses or networks, for example: +# 127.0.0.1, 192.168.0.0/24, [::1], [2000:123:4:5::]/64 +#SOLR_IP_DENYLIST= + +# Sets the network interface the Solr binds to. To prevent administrators from +# accidentally exposing Solr more widely than intended, this defaults to 127.0.0.1. +# Administrators should think carefully about their deployment environment and +# set this value as narrowly as required before going to production. In +# environments where security is not a concern, 0.0.0.0 can be used to allow +# Solr to accept connections on all network interfaces. +#SOLR_JETTY_HOST="127.0.0.1" + +# Enables HTTPS. It is implictly true if you set SOLR_SSL_KEY_STORE. Use this config +# to enable https module with custom jetty configuration. +#SOLR_SSL_ENABLED=true +# Uncomment to set SSL-related system properties +# Be sure to update the paths to the correct keystore for your environment +#SOLR_SSL_KEY_STORE=etc/solr-ssl.keystore.p12 +#SOLR_SSL_KEY_STORE_PASSWORD=secret +#SOLR_SSL_TRUST_STORE=etc/solr-ssl.keystore.p12 +#SOLR_SSL_TRUST_STORE_PASSWORD=secret +# Require clients to authenticate +#SOLR_SSL_NEED_CLIENT_AUTH=false +# Enable clients to authenticate (but not require) +#SOLR_SSL_WANT_CLIENT_AUTH=false +# Verify client's hostname during SSL handshake +#SOLR_SSL_CLIENT_HOSTNAME_VERIFICATION=false +# SSL Certificates contain host/ip "peer name" information that is validated by default. Setting +# this to false can be useful to disable these checks when re-using a certificate on many hosts +#SOLR_SSL_CHECK_PEER_NAME=true +# Override Key/Trust Store types if necessary +#SOLR_SSL_KEY_STORE_TYPE=PKCS12 +#SOLR_SSL_TRUST_STORE_TYPE=PKCS12 + +# Uncomment if you want to override previously defined SSL values for HTTP client +# otherwise keep them commented and the above values will automatically be set for HTTP clients +#SOLR_SSL_CLIENT_KEY_STORE= +#SOLR_SSL_CLIENT_KEY_STORE_PASSWORD= +#SOLR_SSL_CLIENT_TRUST_STORE= +#SOLR_SSL_CLIENT_TRUST_STORE_PASSWORD= +#SOLR_SSL_CLIENT_KEY_STORE_TYPE= +#SOLR_SSL_CLIENT_TRUST_STORE_TYPE= + +# Sets path of Hadoop credential provider (hadoop.security.credential.provider.path property) and +# enables usage of credential store. +# Credential provider should store the following keys: +# * solr.jetty.keystore.password +# * solr.jetty.truststore.password +# Set the two below if you want to set specific store passwords for HTTP client +# * javax.net.ssl.keyStorePassword +# * javax.net.ssl.trustStorePassword +# More info: https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html +#SOLR_HADOOP_CREDENTIAL_PROVIDER_PATH=localjceks://file/home/solr/hadoop-credential-provider.jceks +#SOLR_OPTS=" -Dsolr.ssl.credential.provider.chain=hadoop" + +# Settings for authentication +# Please configure only one of SOLR_AUTHENTICATION_CLIENT_BUILDER or SOLR_AUTH_TYPE parameters +#SOLR_AUTHENTICATION_CLIENT_BUILDER="org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory" +#SOLR_AUTH_TYPE="basic" +#SOLR_AUTHENTICATION_OPTS="-Dbasicauth=solr:SolrRocks" + +# Settings for ZK ACL +#SOLR_ZK_CREDS_AND_ACLS="-DzkACLProvider=org.apache.solr.common.cloud.DigestZkACLProvider \ +# -DzkCredentialsProvider=org.apache.solr.common.cloud.DigestZkCredentialsProvider \ +# -DzkCredentialsInjector=org.apache.solr.common.cloud.VMParamsZkCredentialsInjector \ +# -DzkDigestUsername=admin-user -DzkDigestPassword=CHANGEME-ADMIN-PASSWORD \ +# -DzkDigestReadonlyUsername=readonly-user -DzkDigestReadonlyPassword=CHANGEME-READONLY-PASSWORD" +#SOLR_OPTS="$SOLR_OPTS $SOLR_ZK_CREDS_AND_ACLS" + +# optionally, you can use using a a Java properties file 'zkDigestCredentialsFile' +#... +# -DzkDigestCredentialsFile=/path/to/zkDigestCredentialsFile.properties +#... + +# Use a custom injector to inject ZK credentials into DigestZkACLProvider +# -DzkCredentialsInjector expects a class implementing org.apache.solr.common.cloud.ZkCredentialsInjector +# ... +# -DzkCredentialsInjector=fully.qualified.class.CustomInjectorClassName" +# ... + +# Jetty GZIP module enabled by default +#SOLR_GZIP_ENABLED=true + +# Settings for common system values that may cause operational imparement when system defaults are used. +# Solr can use many processes and many file handles. On modern operating systems the savings by leaving +# these settings low is minuscule, while the consequence can be Solr instability. To turn these checks off, set +# SOLR_ULIMIT_CHECKS=false either here or as part of your profile. + +# Different limits can be set in solr.in.sh or your profile if you prefer as well. +#SOLR_RECOMMENDED_OPEN_FILES= +#SOLR_RECOMMENDED_MAX_PROCESSES= +#SOLR_ULIMIT_CHECKS= + +# When running Solr in non-cloud mode and if planning to do distributed search (using the "shards" parameter), the +# list of hosts needs to be defined in an allow-list or Solr will forbid the request. The allow-list can be configured +# in solr.xml, or if you are using the OOTB solr.xml, can be specified using the system property "solr.allowUrls". +# Alternatively host checking can be disabled by using the system property "solr.disable.allowUrls" +#SOLR_OPTS="$SOLR_OPTS -Dsolr.allowUrls=http://localhost:8983,http://localhost:8984" + +# For a visual indication in the Admin UI of what type of environment this cluster is, configure +# a -Dsolr.environment property below. Valid values are prod, stage, test, dev, with an optional +# label or color, e.g. -Dsolr.environment=test,label=Functional+test,color=brown +# SOLR_OPTS="$SOLR_OPTS -Dsolr.environment=prod" + +# Specifies the path to a common library directory that will be shared across all cores. +# Any JAR files in this directory will be added to the search path for Solr plugins. +# If the specified path is not absolute, it will be relative to `$SOLR_HOME`. +#SOLR_OPTS="$SOLR_OPTS -Dsolr.sharedLib=/path/to/lib" + +# Runs solr in java security manager sandbox. This can protect against some attacks. +# Runtime properties are passed to the security policy file (server/etc/security.policy) +# You can also tweak via standard JDK files such as ~/.java.policy, see https://s.apache.org/java8policy +# This is experimental! It may not work at all with Hadoop/HDFS features. +#SOLR_SECURITY_MANAGER_ENABLED=true +# This variable provides you with the option to disable the Admin UI. if you uncomment the variable below and +# change the value to true. The option is configured as a system property as defined in SOLR_START_OPTS in the start +# scripts. +# SOLR_ADMIN_UI_DISABLED=false + +# Solr is by default allowed to read and write data from/to SOLR_HOME and a few other well defined locations +# Sometimes it may be necessary to place a core or a backup on a different location or a different disk +# This parameter lets you specify file system path(s) to explicitly allow. The special value of '*' will allow any path +#SOLR_OPTS="$SOLR_OPTS -Dsolr.allowPaths=/mnt/bigdisk,/other/path" + +# Solr can attempt to take a heap dump on out of memory errors. To enable this, uncomment the line setting +# SOLR_HEAP_DUMP below. Heap dumps will be saved to SOLR_LOG_DIR/dumps by default. Alternatively, you can specify any +# other directory, which will implicitly enable heap dumping. Dump name pattern will be solr-[timestamp]-pid[###].hprof +# When using this feature, it is recommended to have an external service monitoring the given dir. +# If more fine grained control is required, you can manually add the appropriate flags to SOLR_OPTS +# See https://docs.oracle.com/en/java/javase/11/troubleshoot/command-line-options1.html +# You can test this behaviour by setting SOLR_HEAP=25m +#SOLR_HEAP_DUMP=true +#SOLR_HEAP_DUMP_DIR=/var/log/dumps + +# Before version 9.0, Solr required a copy of solr.xml file in $SOLR_HOME. Now Solr will use a default file if not found. +# To restore the old behaviour, set the variable below to true +#SOLR_SOLRXML_REQUIRED=false + +# Some previous versions of Solr use an outdated log4j dependency. If you are unable to use at least log4j version 2.15.0 +# then enable the following setting to address CVE-2021-44228 +# SOLR_OPTS="$SOLR_OPTS -Dlog4j2.formatMsgNoLookups=true" + +# The bundled plugins in the "modules" folder can easily be enabled as a comma-separated list in SOLR_MODULES variable +# SOLR_MODULES=extraction,ltr + +# Configure the default replica placement plugin to use if one is not configured in cluster properties +# See https://solr.apache.org/guide/solr/latest/configuration-guide/replica-placement-plugins.html for details +#SOLR_PLACEMENTPLUGIN_DEFAULT=simple diff --git a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/solrconfig.xml b/search-solr/solr/business_follower/conf/solrconfig.xml similarity index 93% rename from search-solr/bitnami/solr/server/solr/configsets/_default/conf/solrconfig.xml rename to search-solr/solr/business_follower/conf/solrconfig.xml index 528b65a7..64b1d4f0 100644 --- a/search-solr/bitnami/solr/server/solr/configsets/_default/conf/solrconfig.xml +++ b/search-solr/solr/business_follower/conf/solrconfig.xml @@ -18,7 +18,7 @@ - 9.0 + 9.3 @@ -274,7 +274,7 @@ Instead of enabling autoCommit, consider using "commitWithin" when adding documents. - https://solr.apache.org/guide/indexing-with-update-handlers.html + https://solr.apache.org/guide/solr/latest/indexing-guide/indexing-with-update-handlers.html maxDocs - Maximum number of documents to add since the last commit before automatically triggering a new commit. @@ -428,7 +428,7 @@ + + + + _text_ + + + @@ -747,19 +754,50 @@ - - + + + + + ${solr.leaderUrl:http://172.19.0.2:8983/solr/business} + 00:00:30 + internal + + + + + + + name + name_suggest + BlendedInfixLookupFactory + position_linear + weight + text_stemmed + false + false + false + + + + - _text_ + json + true + true + name + 10 - + + suggest + + @@ -816,7 +854,7 @@ IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! - See https://solr.apache.org/guide/spell-checking.html for details + See https://solr.apache.org/guide/solr/latest/query-guide/spell-checking.html for details on the request parameters. --> @@ -843,7 +881,7 @@ @@ -1023,7 +1061,7 @@ - @@ -1039,22 +1077,22 @@ uniqueness based on that anyway. --> - + + + + true + id + id + solr.processor.Lookup3Signature + + + + + @@ -31,8 +31,9 @@ ${solr.max.booleanClauses:1024} ${solr.sharedLib:} ${solr.modules:} - ${solr.allowPaths:} - ${solr.allowUrls:} + ${solr.allowPaths:/replication} + + ${solr.allowUrls:172.19.0.2:8983/solr/business} @@ -47,6 +48,7 @@ 300000 ${zkCredentialsProvider:org.apache.solr.common.cloud.DefaultZkCredentialsProvider} ${zkACLProvider:org.apache.solr.common.cloud.DefaultZkACLProvider} + ${zkCredentialsInjector:org.apache.solr.common.cloud.DefaultZkCredentialsInjector} ${distributedClusterStateUpdates:false} ${distributedCollectionConfigSetExecution:false} @@ -62,4 +64,4 @@ - + \ No newline at end of file