mirror of https://github.com/interlegis/sapl.git
				
				
			
				 19 changed files with 2773 additions and 70 deletions
			
			
		| @ -0,0 +1,167 @@ | |||||
|  | 
 | ||||
|  | import requests | ||||
|  | import subprocess | ||||
|  | import sys | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | class SolrClient: | ||||
|  | 
 | ||||
|  |     LIST_CONFIGSETS = "{}://{}:{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json" | ||||
|  |     UPLOAD_CONFIGSET = "{}://{}:{}/solr/admin/configs?action=UPLOAD&name={}&wt=json" | ||||
|  |     LIST_COLLECTIONS = "{}://{}:{}/solr/admin/collections?action=LIST&wt=json" | ||||
|  |     STATUS_COLLECTION = "{}://{}:{}/solr/admin/collections?action=CLUSTERSTATUS&collection={}&wt=json" | ||||
|  |     STATUS_CORE = "{}/admin/cores?action=STATUS&name={}" | ||||
|  |     EXISTS_COLLECTION = "{}://{}:{}/solr/{}/admin/ping?wt=json" | ||||
|  |     OPTIMIZE_COLLECTION = "{}://{}:{}/solr/{}/update?optimize=true&wt=json" | ||||
|  |     CREATE_COLLECTION = "{}://{}:{}/solr/admin/collections?action=CREATE" \ | ||||
|  |                         "&name={}&collection.configName={}&numShards={}&replicationFactor={}&wt=json" | ||||
|  |     DELETE_COLLECTION = "{}://{}:{}/solr/admin/collections?action=DELETE&name={}&wt=json" | ||||
|  |     DELETE_DATA = "{}://{}:{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" | ||||
|  | 
 | ||||
|  |     CONFIGSET_NAME = "sapl_configset" | ||||
|  |     NUM_SHARDS = 1 | ||||
|  |     NUM_REPLICAS = 1 | ||||
|  | 
 | ||||
|  |     def __init__(self, address='localhost', port=8983, protocol='http'): | ||||
|  |         self.protocol = protocol | ||||
|  |         self.address = address | ||||
|  |         self.port = port | ||||
|  | 
 | ||||
|  |     def status_collection(self, collection_name): | ||||
|  | 
 | ||||
|  |         col_url = self.STATUS_COLLECTION.format(self.protocol, self.address, self.port, collection_name) | ||||
|  |         resp = requests.get(col_url) | ||||
|  |         status_cluster = resp.json() | ||||
|  |         # TODO: test if collection exists! | ||||
|  |         shards = status_cluster['cluster']['collections'][collection_name]['shards'] | ||||
|  |         num_docs = 0 | ||||
|  |         deleted_docs = 0 | ||||
|  |         for shard in shards.values(): | ||||
|  |             for replica in shard['replicas'].values(): | ||||
|  |                 replica_base_url = replica['base_url'] | ||||
|  |                 replica_core = replica['core'] | ||||
|  |                 req_url = self.STATUS_CORE.format(replica_base_url, replica_core) | ||||
|  |                 resp = requests.get(req_url) | ||||
|  |                 data = resp.json() | ||||
|  |                 # TODO: test if collection exists! | ||||
|  |                 prefix = data['status'][replica_core]['index'] | ||||
|  |                 num_docs += prefix['numDocs'] | ||||
|  |                 deleted_docs += prefix['deletedDocs'] | ||||
|  |                 # get a single replica per shard | ||||
|  |                 break | ||||
|  |         return num_docs, deleted_docs | ||||
|  | 
 | ||||
|  |     def list_collections(self): | ||||
|  |         req_url = self.LIST_COLLECTIONS.format(self.protocol, self.address, self.port) | ||||
|  |         res = requests.get(req_url) | ||||
|  |         dic = res.json() | ||||
|  |         return dic['collections'] | ||||
|  | 
 | ||||
|  |     def exists_collection(self, collection_name): | ||||
|  |         collections = self.list_collections() | ||||
|  |         return True if collection_name in collections else False | ||||
|  | 
 | ||||
|  |     def maybe_upload_configset(self, force=False): | ||||
|  |         req_url = self.LIST_CONFIGSETS.format(self.protocol, | ||||
|  |                                               self.address, | ||||
|  |                                               self.port) | ||||
|  |         res = requests.get(req_url) | ||||
|  |         dic = res.json() | ||||
|  |         configsets = dic['configSets'] | ||||
|  |         # UPLOAD configset | ||||
|  |         if not self.CONFIGSET_NAME in configsets or force: | ||||
|  |             files = {'file': ('saplconfigset.zip', | ||||
|  |                               open('./solr/sapl_configset/conf/saplconfigset.zip', | ||||
|  |                                    'rb'), | ||||
|  |                               'application/octet-stream', | ||||
|  |                               {'Expires': '0'})} | ||||
|  | 
 | ||||
|  |             req_url = self.UPLOAD_CONFIGSET.format(self.protocol, self.address, self.port, self.CONFIGSET_NAME) | ||||
|  | 
 | ||||
|  |             resp = requests.post(req_url, files=files) | ||||
|  |             print(resp.content) | ||||
|  |         else: | ||||
|  |             print('O %s já presente no servidor, NÃO enviando.' % self.CONFIGSET_NAME) | ||||
|  | 
 | ||||
|  |     def create_collection(self, collection_name): | ||||
|  |         self.maybe_upload_configset() | ||||
|  |         req_url = self.CREATE_COLLECTION.format(self.protocol, | ||||
|  |                                             self.address, | ||||
|  |                                             self.port, | ||||
|  |                                             collection_name, | ||||
|  |                                             self.CONFIGSET_NAME, | ||||
|  |                                             self.NUM_SHARDS, | ||||
|  |                                             self.NUM_REPLICAS) | ||||
|  |         print(req_url) | ||||
|  |         res = requests.post(req_url) | ||||
|  |         if res.ok: | ||||
|  |             print("Collection '%s' created succesfully" % collection_name) | ||||
|  |         else: | ||||
|  |             print("Error creating collection '%s'" % collection_name) | ||||
|  |             as_json = res.json() | ||||
|  |             print("Error %s: %s" % (res.status_code, as_json['error']['msg'])) | ||||
|  |             return False | ||||
|  |         return True | ||||
|  | 
 | ||||
|  |     # TODO: redo to collections | ||||
|  |     # def optimize_collection(self, collection_name): | ||||
|  |     #     req_url = self.OPTIMIZE_COLLECTION.format(self.protocol, self.address, self.port, collection_name) | ||||
|  |     #     res = requests.get(req_url) | ||||
|  |     #     if not res.ok: | ||||
|  |     #         print("Error optimizing collection '{}'".format(collection_name)) | ||||
|  |     #         print("Code {}: {}".format(res.status_code, res.text)) | ||||
|  |     #     else: | ||||
|  |     #         print("Collection '{}' optimized successfully!".format(collection_name)) | ||||
|  |      | ||||
|  |     def delete_collection(self, collection_name): | ||||
|  |         req_url = self.DELETE_COLLECTION.format(self.protocol, self.address, self.port, collection_name) | ||||
|  |         res = requests.post(req_url) | ||||
|  |         if not res.ok: | ||||
|  |             print("Error deleting collection '%s'", collection_name) | ||||
|  |             print("Code {}: {}".format(res.status_code, res.text)) | ||||
|  |         else: | ||||
|  |             print("Collection '%s' deleted successfully!" % collection_name) | ||||
|  | 
 | ||||
|  |     def delete_index_data(self, collection_name): | ||||
|  |         req_url = self.DELETE_DATA.format(self.protocol, self.address, self.port, collection_name) | ||||
|  |         res = requests.post(req_url, | ||||
|  |                             data='<delete><query>*:*</query></delete>', | ||||
|  |                             headers={'Content-Type': 'application/xml'}) | ||||
|  |         if not res.ok: | ||||
|  |             print("Error deleting index for collection '%s'", collection_name) | ||||
|  |             print("Code {}: {}".format(res.status_code, res.text)) | ||||
|  |         else: | ||||
|  |             print("Collection '%s' data deleted successfully!" % collection_name) | ||||
|  | 
 | ||||
|  |             indexed, deleted = self.status_collection(collection_name) | ||||
|  |             print("Num docs: %s" % indexed) | ||||
|  |             print("Delete docs: %s" % deleted) | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | if __name__ == '__main__': | ||||
|  | 
 | ||||
|  |     args = sys.argv | ||||
|  |     if len(args) < 2: | ||||
|  |         print("Usage: python3 docker_solr_init.py <collection name> <address>") | ||||
|  |         sys.exit(-1) | ||||
|  |     collection = args[1] | ||||
|  | 
 | ||||
|  |     client = SolrClient() | ||||
|  |     if len(args) == 3: | ||||
|  |         hostname = args[2] | ||||
|  |         client = SolrClient(address=hostname) | ||||
|  | 
 | ||||
|  |     if not client.exists_collection(collection): | ||||
|  |         print("Collection '%s' doesn't exists. Creating a new one..." % collection) | ||||
|  |         created = client.create_collection(collection) | ||||
|  |         if not created: | ||||
|  |             sys.exit(-1) | ||||
|  |     else: | ||||
|  |         print("Collection '%s' exists. Updating indexes..." % collection) | ||||
|  | 
 | ||||
|  |     collection_data = client.status_collection(collection) | ||||
|  |     indexed, _ = client.status_collection(collection) | ||||
|  |     del _ | ||||
|  |     if indexed == 0: | ||||
|  |         print("Performing a full reindex of '%s' collection..." % collection) | ||||
|  |         p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) | ||||
| @ -0,0 +1,54 @@ | |||||
|  | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
|  | # contributor license agreements.  See the NOTICE file distributed with | ||||
|  | # this work for additional information regarding copyright ownership. | ||||
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  | # (the "License"); you may not use this file except in compliance with | ||||
|  | # the License.  You may obtain a copy of the License at | ||||
|  | # | ||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | # | ||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  | # See the License for the specific language governing permissions and | ||||
|  | # limitations under the License. | ||||
|  | 
 | ||||
|  | # a couple of test stopwords to test that the words are really being | ||||
|  | # configured from this file: | ||||
|  | stopworda | ||||
|  | stopwordb | ||||
|  | 
 | ||||
|  | # Standard english stop words taken from Lucene's StopAnalyzer | ||||
|  | a | ||||
|  | an | ||||
|  | and | ||||
|  | are | ||||
|  | as | ||||
|  | at | ||||
|  | be | ||||
|  | but | ||||
|  | by | ||||
|  | for | ||||
|  | if | ||||
|  | in | ||||
|  | into | ||||
|  | is | ||||
|  | it | ||||
|  | no | ||||
|  | not | ||||
|  | of | ||||
|  | on | ||||
|  | or | ||||
|  | such | ||||
|  | that | ||||
|  | the | ||||
|  | their | ||||
|  | then | ||||
|  | there | ||||
|  | these | ||||
|  | they | ||||
|  | this | ||||
|  | to | ||||
|  | was | ||||
|  | will | ||||
|  | with | ||||
| @ -0,0 +1,253 @@ | |||||
|  |  | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | ||||
|  |  | This file is distributed under the BSD License. | ||||
|  |  | See http://snowball.tartarus.org/license.php | ||||
|  |  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  |  - Encoding was converted to UTF-8. | ||||
|  |  |  - This notice was added. | ||||
|  |  | | ||||
|  |  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
|  | 
 | ||||
|  |  | A Portuguese stop word list. Comments begin with vertical bar. Each stop | ||||
|  |  | word is at the start of a line. | ||||
|  | 
 | ||||
|  | 
 | ||||
|  |  | The following is a ranked list (commonest to rarest) of stopwords | ||||
|  |  | deriving from a large sample of text. | ||||
|  | 
 | ||||
|  |  | Extra words have been added at the end. | ||||
|  | 
 | ||||
|  | de             |  of, from | ||||
|  | a              |  the; to, at; her | ||||
|  | o              |  the; him | ||||
|  | que            |  who, that | ||||
|  | e              |  and | ||||
|  | do             |  de + o | ||||
|  | da             |  de + a | ||||
|  | em             |  in | ||||
|  | um             |  a | ||||
|  | para           |  for | ||||
|  |   | é          from SER | ||||
|  | com            |  with | ||||
|  | não            |  not, no | ||||
|  | uma            |  a | ||||
|  | os             |  the; them | ||||
|  | no             |  em + o | ||||
|  | se             |  himself etc | ||||
|  | na             |  em + a | ||||
|  | por            |  for | ||||
|  | mais           |  more | ||||
|  | as             |  the; them | ||||
|  | dos            |  de + os | ||||
|  | como           |  as, like | ||||
|  | mas            |  but | ||||
|  |   | foi        from SER | ||||
|  | ao             |  a + o | ||||
|  | ele            |  he | ||||
|  | das            |  de + as | ||||
|  |   | tem        from TER | ||||
|  | à              |  a + a | ||||
|  | seu            |  his | ||||
|  | sua            |  her | ||||
|  | ou             |  or | ||||
|  |   | ser        from SER | ||||
|  | quando         |  when | ||||
|  | muito          |  much | ||||
|  |   | há         from HAV | ||||
|  | nos            |  em + os; us | ||||
|  | já             |  already, now | ||||
|  |   | está       from EST | ||||
|  | eu             |  I | ||||
|  | também         |  also | ||||
|  | só             |  only, just | ||||
|  | pelo           |  per + o | ||||
|  | pela           |  per + a | ||||
|  | até            |  up to | ||||
|  | isso           |  that | ||||
|  | ela            |  he | ||||
|  | entre          |  between | ||||
|  |   | era        from SER | ||||
|  | depois         |  after | ||||
|  | sem            |  without | ||||
|  | mesmo          |  same | ||||
|  | aos            |  a + os | ||||
|  |   | ter        from TER | ||||
|  | seus           |  his | ||||
|  | quem           |  whom | ||||
|  | nas            |  em + as | ||||
|  | me             |  me | ||||
|  | esse           |  that | ||||
|  | eles           |  they | ||||
|  |   | estão      from EST | ||||
|  | você           |  you | ||||
|  |   | tinha      from TER | ||||
|  |   | foram      from SER | ||||
|  | essa           |  that | ||||
|  | num            |  em + um | ||||
|  | nem            |  nor | ||||
|  | suas           |  her | ||||
|  | meu            |  my | ||||
|  | às             |  a + as | ||||
|  | minha          |  my | ||||
|  |   | têm        from TER | ||||
|  | numa           |  em + uma | ||||
|  | pelos          |  per + os | ||||
|  | elas           |  they | ||||
|  |   | havia      from HAV | ||||
|  |   | seja       from SER | ||||
|  | qual           |  which | ||||
|  |   | será       from SER | ||||
|  | nós            |  we | ||||
|  |   | tenho      from TER | ||||
|  | lhe            |  to him, her | ||||
|  | deles          |  of them | ||||
|  | essas          |  those | ||||
|  | esses          |  those | ||||
|  | pelas          |  per + as | ||||
|  | este           |  this | ||||
|  |   | fosse      from SER | ||||
|  | dele           |  of him | ||||
|  | 
 | ||||
|  |  | other words. There are many contractions such as naquele = em+aquele, | ||||
|  |  | mo = me+o, but they are rare. | ||||
|  |  | Indefinite article plural forms are also rare. | ||||
|  | 
 | ||||
|  | tu             |  thou | ||||
|  | te             |  thee | ||||
|  | vocês          |  you (plural) | ||||
|  | vos            |  you | ||||
|  | lhes           |  to them | ||||
|  | meus           |  my | ||||
|  | minhas | ||||
|  | teu            |  thy | ||||
|  | tua | ||||
|  | teus | ||||
|  | tuas | ||||
|  | nosso          | our | ||||
|  | nossa | ||||
|  | nossos | ||||
|  | nossas | ||||
|  | 
 | ||||
|  | dela           |  of her | ||||
|  | delas          |  of them | ||||
|  | 
 | ||||
|  | esta           |  this | ||||
|  | estes          |  these | ||||
|  | estas          |  these | ||||
|  | aquele         |  that | ||||
|  | aquela         |  that | ||||
|  | aqueles        |  those | ||||
|  | aquelas        |  those | ||||
|  | isto           |  this | ||||
|  | aquilo         |  that | ||||
|  | 
 | ||||
|  |                | forms of estar, to be (not including the infinitive): | ||||
|  | estou | ||||
|  | está | ||||
|  | estamos | ||||
|  | estão | ||||
|  | estive | ||||
|  | esteve | ||||
|  | estivemos | ||||
|  | estiveram | ||||
|  | estava | ||||
|  | estávamos | ||||
|  | estavam | ||||
|  | estivera | ||||
|  | estivéramos | ||||
|  | esteja | ||||
|  | estejamos | ||||
|  | estejam | ||||
|  | estivesse | ||||
|  | estivéssemos | ||||
|  | estivessem | ||||
|  | estiver | ||||
|  | estivermos | ||||
|  | estiverem | ||||
|  | 
 | ||||
|  |                | forms of haver, to have (not including the infinitive): | ||||
|  | hei | ||||
|  | há | ||||
|  | havemos | ||||
|  | hão | ||||
|  | houve | ||||
|  | houvemos | ||||
|  | houveram | ||||
|  | houvera | ||||
|  | houvéramos | ||||
|  | haja | ||||
|  | hajamos | ||||
|  | hajam | ||||
|  | houvesse | ||||
|  | houvéssemos | ||||
|  | houvessem | ||||
|  | houver | ||||
|  | houvermos | ||||
|  | houverem | ||||
|  | houverei | ||||
|  | houverá | ||||
|  | houveremos | ||||
|  | houverão | ||||
|  | houveria | ||||
|  | houveríamos | ||||
|  | houveriam | ||||
|  | 
 | ||||
|  |                | forms of ser, to be (not including the infinitive): | ||||
|  | sou | ||||
|  | somos | ||||
|  | são | ||||
|  | era | ||||
|  | éramos | ||||
|  | eram | ||||
|  | fui | ||||
|  | foi | ||||
|  | fomos | ||||
|  | foram | ||||
|  | fora | ||||
|  | fôramos | ||||
|  | seja | ||||
|  | sejamos | ||||
|  | sejam | ||||
|  | fosse | ||||
|  | fôssemos | ||||
|  | fossem | ||||
|  | for | ||||
|  | formos | ||||
|  | forem | ||||
|  | serei | ||||
|  | será | ||||
|  | seremos | ||||
|  | serão | ||||
|  | seria | ||||
|  | seríamos | ||||
|  | seriam | ||||
|  | 
 | ||||
|  |                | forms of ter, to have (not including the infinitive): | ||||
|  | tenho | ||||
|  | tem | ||||
|  | temos | ||||
|  | tém | ||||
|  | tinha | ||||
|  | tínhamos | ||||
|  | tinham | ||||
|  | tive | ||||
|  | teve | ||||
|  | tivemos | ||||
|  | tiveram | ||||
|  | tivera | ||||
|  | tivéramos | ||||
|  | tenha | ||||
|  | tenhamos | ||||
|  | tenham | ||||
|  | tivesse | ||||
|  | tivéssemos | ||||
|  | tivessem | ||||
|  | tiver | ||||
|  | tivermos | ||||
|  | tiverem | ||||
|  | terei | ||||
|  | terá | ||||
|  | teremos | ||||
|  | terão | ||||
|  | teria | ||||
|  | teríamos | ||||
|  | teriam | ||||
| @ -0,0 +1,573 @@ | |||||
|  | <?xml version="1.0" encoding="UTF-8" ?> | ||||
|  | <!-- | ||||
|  |  Licensed to the Apache Software Foundation (ASF) under one or more | ||||
|  |  contributor license agreements.  See the NOTICE file distributed with | ||||
|  |  this work for additional information regarding copyright ownership. | ||||
|  |  The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  |  (the "License"); you may not use this file except in compliance with | ||||
|  |  the License.  You may obtain a copy of the License at | ||||
|  | 
 | ||||
|  |      http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | 
 | ||||
|  |  Unless required by applicable law or agreed to in writing, software | ||||
|  |  distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  |  See the License for the specific language governing permissions and | ||||
|  |  limitations under the License. | ||||
|  | --> | ||||
|  | 
 | ||||
|  | <!-- | ||||
|  | 
 | ||||
|  |  This example schema is the recommended starting point for users. | ||||
|  |  It should be kept correct and concise, usable out-of-the-box. | ||||
|  | 
 | ||||
|  | 
 | ||||
|  |  For more information, on how to customize this file, please see | ||||
|  |  http://lucene.apache.org/solr/guide/documents-fields-and-schema-design.html | ||||
|  | 
 | ||||
|  |  PERFORMANCE NOTE: this schema includes many optional features and should not | ||||
|  |  be used for benchmarking.  To improve performance one could | ||||
|  |   - set stored="false" for all fields possible (esp large fields) when you | ||||
|  |     only need to search on the field but don't need to return the original | ||||
|  |     value. | ||||
|  |   - set indexed="false" if you don't need to search on the field, but only | ||||
|  |     return the field as a result of searching on other indexed fields. | ||||
|  |   - remove all unneeded copyField statements | ||||
|  |   - for best index size and searching performance, set "index" to false | ||||
|  |     for all general text fields, use copyField to copy them to the | ||||
|  |     catchall "text" field, and use that for searching. | ||||
|  | --> | ||||
|  | 
 | ||||
|  | <schema name="default-config" version="1.6"> | ||||
|  |     <!-- attribute "name" is the name of this schema and is only used for display purposes. | ||||
|  |        version="x.y" is Solr's version number for the schema syntax and  | ||||
|  |        semantics.  It should not normally be changed by applications. | ||||
|  | 
 | ||||
|  |        1.0: multiValued attribute did not exist, all fields are multiValued  | ||||
|  |             by nature | ||||
|  |        1.1: multiValued attribute introduced, false by default  | ||||
|  |        1.2: omitTermFreqAndPositions attribute introduced, true by default  | ||||
|  |             except for text fields. | ||||
|  |        1.3: removed optional field compress feature | ||||
|  |        1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser | ||||
|  |             behavior when a single string produces multiple tokens.  Defaults  | ||||
|  |             to off for version >= 1.4 | ||||
|  |        1.5: omitNorms defaults to true for primitive field types  | ||||
|  |             (int, float, boolean, string...) | ||||
|  |        1.6: useDocValuesAsStored defaults to true. | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- Valid attributes for fields: | ||||
|  |      name: mandatory - the name for the field | ||||
|  |      type: mandatory - the name of a field type from the  | ||||
|  |        fieldTypes section | ||||
|  |      indexed: true if this field should be indexed (searchable or sortable) | ||||
|  |      stored: true if this field should be retrievable | ||||
|  |      docValues: true if this field should have doc values. Doc Values is | ||||
|  |        recommended (required, if you are using *Point fields) for faceting, | ||||
|  |        grouping, sorting and function queries. Doc Values will make the index | ||||
|  |        faster to load, more NRT-friendly and more memory-efficient.  | ||||
|  |        They are currently only supported by StrField, UUIDField, all  | ||||
|  |        *PointFields, and depending on the field type, they might require | ||||
|  |        the field to be single-valued, be required or have a default value | ||||
|  |        (check the documentation of the field type you're interested in for | ||||
|  |        more information) | ||||
|  |      multiValued: true if this field may contain multiple values per document | ||||
|  |      omitNorms: (expert) set to true to omit the norms associated with | ||||
|  |        this field (this disables length normalization and index-time | ||||
|  |        boosting for the field, and saves some memory).  Only full-text | ||||
|  |        fields or fields that need an index-time boost need norms. | ||||
|  |        Norms are omitted for primitive (non-analyzed) types by default. | ||||
|  |      termVectors: [false] set to true to store the term vector for a | ||||
|  |        given field. | ||||
|  |        When using MoreLikeThis, fields used for similarity should be | ||||
|  |        stored for best performance. | ||||
|  |      termPositions: Store position information with the term vector.   | ||||
|  |        This will increase storage costs. | ||||
|  |      termOffsets: Store offset information with the term vector. This  | ||||
|  |        will increase storage costs. | ||||
|  |      required: The field is required.  It will throw an error if the | ||||
|  |        value does not exist | ||||
|  |      default: a value that should be used if no value is specified | ||||
|  |        when adding a document. | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- field names should consist of alphanumeric or underscore characters only and | ||||
|  |       not start with a digit.  This is not currently strictly enforced, | ||||
|  |       but other field names will not have first class support from all components | ||||
|  |       and back compatibility is not guaranteed.  Names with both leading and | ||||
|  |       trailing underscores (e.g. _version_) are reserved. | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- In this _default configset, only four fields are pre-declared: | ||||
|  |          id, _version_, and _text_ and _root_. All other fields will be type guessed and added via the | ||||
|  |          "add-unknown-fields-to-the-schema" update request processor chain declared in solrconfig.xml. | ||||
|  |           | ||||
|  |          Note that many dynamic fields are also defined - you can use them to specify a  | ||||
|  |          field's type via field naming conventions - see below. | ||||
|  |    | ||||
|  |          WARNING: The _text_ catch-all field will significantly increase your index size. | ||||
|  |          If you don't need it, consider removing it and the corresponding copyField directive. | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> | ||||
|  |     <!-- docValues are enabled by default for long type so we don't need to index the version field  --> | ||||
|  |     <field name="_version_" type="plong" indexed="false" stored="false"/> | ||||
|  |     <field name="_root_" type="string" indexed="true" stored="false" docValues="false" /> | ||||
|  |     <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/> | ||||
|  | 
 | ||||
|  |     <!-- Django fields --> | ||||
|  |     <field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> | ||||
|  | 
 | ||||
|  |     <!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default | ||||
|  |          because it's very expensive to index everything twice. --> | ||||
|  |     <!-- <copyField source="*" dest="_text_"/> --> | ||||
|  | 
 | ||||
|  |     <!-- Dynamic field definitions allow using convention over configuration | ||||
|  |        for fields via the specification of patterns to match field names. | ||||
|  |        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i) | ||||
|  |        RESTRICTION: the glob-like pattern in the name attribute must have a "*" only at the start or the end.  --> | ||||
|  |     | ||||
|  |     <dynamicField name="*_i"  type="pint"    indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_is" type="pints"    indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" /> | ||||
|  |     <dynamicField name="*_ss" type="strings"  indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_l"  type="plong"   indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_ls" type="plongs"   indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_t" type="text_general" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> | ||||
|  |     <dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/> | ||||
|  |     <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> | ||||
|  |     <dynamicField name="*_f"  type="pfloat"  indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_fs" type="pfloats"  indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_d"  type="pdouble" indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_ds" type="pdoubles" indexed="true"  stored="true"/> | ||||
|  | 
 | ||||
|  |     <!-- Type used for data-driven schema, to add a string copy for each text field --> | ||||
|  |     <dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" /> | ||||
|  | 
 | ||||
|  |     <dynamicField name="*_dt"  type="pdate"    indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_dts" type="pdate"    indexed="true"  stored="true" multiValued="true"/> | ||||
|  |     <dynamicField name="*_p"  type="location" indexed="true" stored="true"/> | ||||
|  |     <dynamicField name="*_srpt"  type="location_rpt" indexed="true" stored="true"/> | ||||
|  |      | ||||
|  |     <!-- payloaded dynamic fields --> | ||||
|  |     <dynamicField name="*_dpf" type="delimited_payloads_float" indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_dpi" type="delimited_payloads_int" indexed="true"  stored="true"/> | ||||
|  |     <dynamicField name="*_dps" type="delimited_payloads_string" indexed="true"  stored="true"/> | ||||
|  | 
 | ||||
|  |     <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> | ||||
|  | 
 | ||||
|  |     <!-- Field to use to determine and enforce document uniqueness. | ||||
|  |       Unless this field is marked with required="false", it will be a required field | ||||
|  |     --> | ||||
|  |     <uniqueKey>id</uniqueKey> | ||||
|  | 
 | ||||
|  |     <!-- copyField commands copy one field to another at the time a document | ||||
|  |        is added to the index.  It's used either to index the same field differently, | ||||
|  |        or to add multiple fields to the same field for easier/faster searching. | ||||
|  | 
 | ||||
|  |     <copyField source="sourceFieldName" dest="destinationFieldName"/> | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- field type definitions. The "name" attribute is | ||||
|  |        just a label to be used by field definitions.  The "class" | ||||
|  |        attribute and any other attributes determine the real | ||||
|  |        behavior of the fieldType. | ||||
|  |          Class names starting with "solr" refer to java classes in a | ||||
|  |        standard package such as org.apache.solr.analysis | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are | ||||
|  |          currently supported on types that are sorted internally as strings | ||||
|  |          and on numeric types. | ||||
|  |        This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble". | ||||
|  |        - If sortMissingLast="true", then a sort on this field will cause documents | ||||
|  |          without the field to come after documents with the field, | ||||
|  |          regardless of the requested sort order (asc or desc). | ||||
|  |        - If sortMissingFirst="true", then a sort on this field will cause documents | ||||
|  |          without the field to come before documents with the field, | ||||
|  |          regardless of the requested sort order. | ||||
|  |        - If sortMissingLast="false" and sortMissingFirst="false" (the default), | ||||
|  |          then default lucene sorting will be used which places docs without the | ||||
|  |          field first in an ascending sort and last in a descending sort. | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- The StrField type is not analyzed, but indexed/stored verbatim. --> | ||||
|  |     <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" /> | ||||
|  |     <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" /> | ||||
|  | 
 | ||||
|  |     <!-- boolean type: "true" or "false" --> | ||||
|  |     <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | ||||
|  |     <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | ||||
|  | 
 | ||||
|  |     <!-- | ||||
|  |       Numeric field types that index values using KD-trees. | ||||
|  |       Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. | ||||
|  |     --> | ||||
|  |     <fieldType name="pint" class="solr.IntPointField" docValues="true"/> | ||||
|  |     <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> | ||||
|  |     <fieldType name="plong" class="solr.LongPointField" docValues="true"/> | ||||
|  |     <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | ||||
|  |      | ||||
|  |     <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | ||||
|  | 
 | ||||
|  |     <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and | ||||
|  |          is a more restricted form of the canonical representation of dateTime | ||||
|  |          http://www.w3.org/TR/xmlschema-2/#dateTime     | ||||
|  |          The trailing "Z" designates UTC time and is mandatory. | ||||
|  |          Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z | ||||
|  |          All other components are mandatory. | ||||
|  | 
 | ||||
|  |          Expressions can also be used to denote calculations that should be | ||||
|  |          performed relative to "NOW" to determine the value, ie... | ||||
|  | 
 | ||||
|  |                NOW/HOUR | ||||
|  |                   ... Round to the start of the current hour | ||||
|  |                NOW-1DAY | ||||
|  |                   ... Exactly 1 day prior to now | ||||
|  |                NOW/DAY+6MONTHS+3DAYS | ||||
|  |                   ... 6 months and 3 days in the future from the start of | ||||
|  |                       the current day | ||||
|  |                        | ||||
|  |       --> | ||||
|  |     <!-- KD-tree versions of date fields --> | ||||
|  |     <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> | ||||
|  |     <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | ||||
|  |      | ||||
|  |     <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> | ||||
|  |     <fieldType name="binary" class="solr.BinaryField"/> | ||||
|  | 
 | ||||
|  |     <!-- solr.TextField allows the specification of custom text analyzers | ||||
|  |          specified as a tokenizer and a list of token filters. Different | ||||
|  |          analyzers may be specified for indexing and querying. | ||||
|  | 
 | ||||
|  |          The optional positionIncrementGap puts space between multiple fields of | ||||
|  |          this type on the same document, with the purpose of preventing false phrase | ||||
|  |          matching across fields. | ||||
|  | 
 | ||||
|  |          For more info on customizing your analyzer chain, please see | ||||
|  |          http://lucene.apache.org/solr/guide/understanding-analyzers-tokenizers-and-filters.html#understanding-analyzers-tokenizers-and-filters | ||||
|  |      --> | ||||
|  | 
 | ||||
|  |     <!-- One can also specify an existing Analyzer class that has a | ||||
|  |          default constructor via the class attribute on the analyzer element. | ||||
|  |          Example: | ||||
|  |     <fieldType name="text_greek" class="solr.TextField"> | ||||
|  |       <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> | ||||
|  |     </fieldType> | ||||
|  |     --> | ||||
|  | 
 | ||||
|  |     <!-- A text field that only splits on whitespace for exact matching of words --> | ||||
|  |     <dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- A general text field that has reasonable, generic | ||||
|  |          cross-language defaults: it tokenizes with StandardTokenizer, | ||||
|  | 	       removes stop words from case-insensitive "stopwords.txt" | ||||
|  | 	       (empty by default), and down cases.  At query time only, it | ||||
|  | 	       also applies synonyms. | ||||
|  | 	  --> | ||||
|  |     <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <!-- in this example, we will only use synonyms at query time | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         <filter class="solr.FlattenGraphFilterFactory"/> | ||||
|  |         --> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |      | ||||
|  |     <!-- SortableTextField generaly functions exactly like TextField, | ||||
|  |          except that it supports, and by default uses, docValues for sorting (or faceting) | ||||
|  |          on the first 1024 characters of the original field values (which is configurable). | ||||
|  |           | ||||
|  |          This makes it a bit more useful then TextField in many situations, but the trade-off | ||||
|  |          is that it takes up more space on disk; which is why it's not used in place of TextField | ||||
|  |          for every fieldType in this _default schema. | ||||
|  | 	  --> | ||||
|  |     <dynamicField name="*_t_sort" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/> | ||||
|  |     <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, | ||||
|  |          removes English stop words (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and | ||||
|  |          finally applies Porter's stemming.  The query time analyzer also applies synonyms from synonyms.txt. --> | ||||
|  |     <dynamicField name="*_txt_en" type="text_en"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <!-- in this example, we will only use synonyms at query time | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         <filter class="solr.FlattenGraphFilterFactory"/> | ||||
|  |         --> | ||||
|  |         <!-- Case insensitive stop word removal. | ||||
|  |         --> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |             /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.EnglishPossessiveFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | ||||
|  |         <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  | 	      --> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |         /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.EnglishPossessiveFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | ||||
|  |         <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  | 	      --> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- A text field with defaults appropriate for English, plus | ||||
|  |          aggressive word-splitting and autophrase features enabled. | ||||
|  |          This field is just like text_en, except it adds | ||||
|  |          WordDelimiterGraphFilter to enable splitting and matching of | ||||
|  |          words on case-change, alpha numeric boundaries, and | ||||
|  |          non-alphanumeric chars.  This means certain compound word | ||||
|  |          cases will work, for example query "wi fi" will match | ||||
|  |          document "WiFi" or "wi-fi". | ||||
|  |     --> | ||||
|  |     <dynamicField name="*_txt_en_split" type="text_en_splitting"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <!-- in this example, we will only use synonyms at query time | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         --> | ||||
|  |         <!-- Case insensitive stop word removal. | ||||
|  |         --> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |         /> | ||||
|  |         <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |         <filter class="solr.FlattenGraphFilterFactory" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |         /> | ||||
|  |         <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- Less flexible matching, but less false matches.  Probably not ideal for product names, | ||||
|  |          but may be good for SKUs.  Can insert dashes in the wrong place and still match. --> | ||||
|  |     <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | ||||
|  |         <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  |         <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | ||||
|  |              possible with WordDelimiterGraphFilter in conjuncton with stemming. --> | ||||
|  |         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | ||||
|  |         <filter class="solr.FlattenGraphFilterFactory" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | ||||
|  |         <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  |         <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | ||||
|  |              possible with WordDelimiterGraphFilter in conjuncton with stemming. --> | ||||
|  |         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- Just like text_general except it reverses the characters of | ||||
|  | 	       each token, to enable more efficient leading wildcard queries. | ||||
|  |     --> | ||||
|  |     <dynamicField name="*_txt_rev" type="text_general_rev"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" | ||||
|  |                 maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- lowercases the entire field value, keeping it as a single token.  --> | ||||
|  |     <dynamicField name="*_s_lower" type="lowercase"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.KeywordTokenizerFactory"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory" /> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!--  | ||||
|  |       Example of using PathHierarchyTokenizerFactory at index time, so | ||||
|  |       queries for paths match documents at that path, or in descendent paths | ||||
|  |     --> | ||||
|  |     <dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="descendent_path" class="solr.TextField"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.KeywordTokenizerFactory" /> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- | ||||
|  |       Example of using PathHierarchyTokenizerFactory at query time, so | ||||
|  |       queries for paths match documents at that path, or in ancestor paths | ||||
|  |     --> | ||||
|  |     <dynamicField name="*_ancestor_path" type="ancestor_path"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="ancestor_path" class="solr.TextField"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.KeywordTokenizerFactory" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- This point type indexes the coordinates as separate fields (subFields) | ||||
|  |       If subFieldType is defined, it references a type, and a dynamic field | ||||
|  |       definition is created matching *___<typename>.  Alternately, if  | ||||
|  |       subFieldSuffix is defined, that is used to create the subFields. | ||||
|  |       Example: if subFieldType="double", then the coordinates would be | ||||
|  |         indexed in fields myloc_0___double,myloc_1___double. | ||||
|  |       Example: if subFieldSuffix="_d" then the coordinates would be indexed | ||||
|  |         in fields myloc_0_d,myloc_1_d | ||||
|  |       The subFields are an implementation detail of the fieldType, and end | ||||
|  |       users normally should not need to know about them. | ||||
|  |      --> | ||||
|  |     <dynamicField name="*_point" type="point"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | ||||
|  | 
 | ||||
|  |     <!-- A specialized field for geospatial search filters and distance sorting. --> | ||||
|  |     <fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/> | ||||
|  | 
 | ||||
|  |     <!-- A geospatial field type that supports multiValued and polygon shapes. | ||||
|  |       For more information about this and other spatial fields see: | ||||
|  |       http://lucene.apache.org/solr/guide/spatial-search.html | ||||
|  |     --> | ||||
|  |     <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | ||||
|  |                geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> | ||||
|  | 
 | ||||
|  |     <!-- Payloaded field types --> | ||||
|  |     <fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  |     <fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  |     <fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |         <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |   <!-- Portuguese --> | ||||
|  |   <dynamicField name="*_txt_pt" type="text_pt"  indexed="true"  stored="true"/> | ||||
|  |   <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer> | ||||
|  |         <charFilter class="solr.HTMLStripCharFilterFactory"/>  | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> | ||||
|  |         <filter class="solr.PortugueseLightStemFilterFactory"/> | ||||
|  |         <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> | ||||
|  |         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> | ||||
|  |         <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> | ||||
|  |       </analyzer> | ||||
|  |   </fieldType> | ||||
|  |      | ||||
|  |     <!-- Similarity is the scoring routine for each document vs. a query. | ||||
|  |        A custom Similarity or SimilarityFactory may be specified here, but  | ||||
|  |        the default is fine for most applications.   | ||||
|  |        For more info: http://lucene.apache.org/solr/guide/other-schema-elements.html#OtherSchemaElements-Similarity | ||||
|  |     --> | ||||
|  |     <!-- | ||||
|  |      <similarity class="com.example.solr.CustomSimilarityFactory"> | ||||
|  |        <str name="paramkey">param value</str> | ||||
|  |      </similarity> | ||||
|  |     --> | ||||
|  | 
 | ||||
|  | </schema> | ||||
| @ -0,0 +1,20 @@ | |||||
|  | {"params":{ | ||||
|  |   "query":{ | ||||
|  |     "defType":"edismax", | ||||
|  |     "q.alt":"*:*", | ||||
|  |     "rows":"10", | ||||
|  |     "fl":"*,score", | ||||
|  |     "":{"v":0} | ||||
|  |   }, | ||||
|  |   "facets":{ | ||||
|  |     "facet":"on", | ||||
|  |     "facet.mincount": "1", | ||||
|  |     "":{"v":0} | ||||
|  |   }, | ||||
|  |  "velocity":{ | ||||
|  |    "wt": "velocity", | ||||
|  |    "v.template":"browse", | ||||
|  |    "v.layout": "layout", | ||||
|  |    "":{"v":0} | ||||
|  |  } | ||||
|  | }} | ||||
| @ -0,0 +1,21 @@ | |||||
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  | # (the "License"); you may not use this file except in compliance with | ||||
|  | # the License.  You may obtain a copy of the License at | ||||
|  | # | ||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | # | ||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  | # See the License for the specific language governing permissions and | ||||
|  | # limitations under the License. | ||||
|  | 
 | ||||
|  | #----------------------------------------------------------------------- | ||||
|  | # Use a protected word file to protect against the stemmer reducing two | ||||
|  | # unrelated words to the same base word. | ||||
|  | 
 | ||||
|  | # Some non-words that normally won't be encountered, | ||||
|  | # just to test that they won't be stemmed. | ||||
|  | dontstems | ||||
|  | zwhacky | ||||
|  | 
 | ||||
								
									Binary file not shown.
								
							
						
					| @ -0,0 +1,165 @@ | |||||
|  | <?xml version="1.0" ?> | ||||
|  | <!-- | ||||
|  |  Licensed to the Apache Software Foundation (ASF) under one or more | ||||
|  |  contributor license agreements.  See the NOTICE file distributed with | ||||
|  |  this work for additional information regarding copyright ownership. | ||||
|  |  The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  |  (the "License"); you may not use this file except in compliance with | ||||
|  |  the License.  You may obtain a copy of the License at | ||||
|  | 
 | ||||
|  |      http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | 
 | ||||
|  |  Unless required by applicable law or agreed to in writing, software | ||||
|  |  distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  |  See the License for the specific language governing permissions and | ||||
|  |  limitations under the License. | ||||
|  | --> | ||||
|  | 
 | ||||
|  | <schema name="default" version="1.6"> | ||||
|  |   <types> | ||||
|  |     <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/> | ||||
|  |     <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> | ||||
|  |     <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> | ||||
|  |     <fieldtype name="binary" class="solr.BinaryField"/> | ||||
|  | 
 | ||||
|  |     <!-- Numeric field types that manipulate the value into | ||||
|  |          a string value that isn't human-readable in its internal form, | ||||
|  |          but with a lexicographic ordering the same as the numeric ordering, | ||||
|  |          so that range queries work correctly. --> | ||||
|  |     <fieldType name="pint" class="solr.IntPointField" docValues="true" /> | ||||
|  |     <fieldType name="pfloat" class="solr.FloatPointField" docValues="true" /> | ||||
|  |     <fieldType name="plong" class="solr.LongPointField" docValues="true" /> | ||||
|  |     <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> | ||||
|  | 
 | ||||
|  | 
 | ||||
|  |     <fieldType name="pdate" class="solr.DatePointField" docValues="true" /> | ||||
|  |     <!-- A Trie based date field ifor faster date range queries and date faceting. --> | ||||
|  | 
 | ||||
|  |     <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> | ||||
|  |     <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> | ||||
|  |      | ||||
|  | 
 | ||||
|  |     <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | ||||
|  |     <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | ||||
|  |     <fieldtype name="geohash" class="solr.GeoHashField"/> | ||||
|  | 
 | ||||
|  |     <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <!-- in this example, we will only use synonyms at query time | ||||
|  |         <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> | ||||
|  |         --> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | ||||
|  |         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <!-- Portuguese --> | ||||
|  |     <dynamicField name="*_txt_pt" type="text_pt"  indexed="true"  stored="true"/> | ||||
|  |     <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> | ||||
|  |         <filter class="solr.PortugueseLightStemFilterFactory"/> | ||||
|  |         <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> | ||||
|  |         <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> | ||||
|  |         <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  | 
 | ||||
|  |     <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |                 /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.EnglishPossessiveFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | ||||
|  |           <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  |         --> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.StandardTokenizerFactory"/> | ||||
|  |         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | ||||
|  |         <filter class="solr.StopFilterFactory" | ||||
|  |                 ignoreCase="true" | ||||
|  |                 words="lang/stopwords_en.txt" | ||||
|  |                 /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.EnglishPossessiveFilterFactory"/> | ||||
|  |         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | ||||
|  |         <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: | ||||
|  |           <filter class="solr.EnglishMinimalStemFilterFactory"/> | ||||
|  |         --> | ||||
|  |         <filter class="solr.PorterStemFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | ||||
|  |       <analyzer> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <fieldType name="ngram" class="solr.TextField" > | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.KeywordTokenizerFactory"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |         <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.KeywordTokenizerFactory"/> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  | 
 | ||||
|  |     <fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1"> | ||||
|  |       <analyzer type="index"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory" /> | ||||
|  |         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | ||||
|  |         <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" /> | ||||
|  |       </analyzer> | ||||
|  |       <analyzer type="query"> | ||||
|  |         <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||||
|  |         <filter class="solr.LowerCaseFilterFactory" /> | ||||
|  |         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | ||||
|  |       </analyzer> | ||||
|  |     </fieldType> | ||||
|  |   </types> | ||||
|  | 
 | ||||
|  |   <fields> | ||||
|  |     <!-- general --> | ||||
|  |     <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/> | ||||
|  |     <field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> | ||||
|  |     <field name="_version_" type="plong" indexed="true" stored ="true"/> | ||||
|  |     <field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> | ||||
|  | 
 | ||||
|  |   </fields> | ||||
|  | 
 | ||||
|  |   <!-- field to use to determine and enforce document uniqueness. --> | ||||
|  |   <uniqueKey>id</uniqueKey> | ||||
|  | 
 | ||||
|  |   <!-- field for the QueryParser to use when an explicit fieldname is absent --> | ||||
|  |   <df>text</df> | ||||
|  | 
 | ||||
|  |   <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> | ||||
|  |   <solrQueryParser q.op="AND"/> | ||||
|  | </schema> | ||||
								
									
										File diff suppressed because it is too large
									
								
							
						
					| @ -0,0 +1,14 @@ | |||||
|  | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
|  | # contributor license agreements.  See the NOTICE file distributed with | ||||
|  | # this work for additional information regarding copyright ownership. | ||||
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  | # (the "License"); you may not use this file except in compliance with | ||||
|  | # the License.  You may obtain a copy of the License at | ||||
|  | # | ||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | # | ||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  | # See the License for the specific language governing permissions and | ||||
|  | # limitations under the License. | ||||
| @ -0,0 +1,29 @@ | |||||
|  | # The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
|  | # (the "License"); you may not use this file except in compliance with | ||||
|  | # the License.  You may obtain a copy of the License at | ||||
|  | # | ||||
|  | #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | # | ||||
|  | # Unless required by applicable law or agreed to in writing, software | ||||
|  | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  | # See the License for the specific language governing permissions and | ||||
|  | # limitations under the License. | ||||
|  | 
 | ||||
|  | #----------------------------------------------------------------------- | ||||
|  | #some test synonym mappings unlikely to appear in real input text | ||||
|  | aaafoo => aaabar | ||||
|  | bbbfoo => bbbfoo bbbbar | ||||
|  | cccfoo => cccbar cccbaz | ||||
|  | fooaaa,baraaa,bazaaa | ||||
|  | 
 | ||||
|  | # Some synonym groups specific to this example | ||||
|  | GB,gib,gigabyte,gigabytes | ||||
|  | MB,mib,megabyte,megabytes | ||||
|  | Television, Televisions, TV, TVs | ||||
|  | #notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming | ||||
|  | #after us won't split it into two words. | ||||
|  | 
 | ||||
|  | # Synonym mappings can be used for spelling correction too | ||||
|  | pixima => pixma | ||||
|  | 
 | ||||
					Loading…
					
					
				
		Reference in new issue