mirror of https://github.com/interlegis/sapl.git
Edward
6 years ago
committed by
GitHub
20 changed files with 2797 additions and 55 deletions
@ -0,0 +1,61 @@ |
|||||
|
version: '2' |
||||
|
services: |
||||
|
sapldb: |
||||
|
image: postgres:10.5-alpine |
||||
|
restart: always |
||||
|
environment: |
||||
|
POSTGRES_PASSWORD: sapl |
||||
|
POSTGRES_USER: sapl |
||||
|
POSTGRES_DB: sapl |
||||
|
PGDATA : /var/lib/postgresql/data/ |
||||
|
volumes: |
||||
|
- sapldb_data:/var/lib/postgresql/data/ |
||||
|
ports: |
||||
|
- "5432:5432" |
||||
|
|
||||
|
saplsolr: |
||||
|
image: solr:7.4-alpine |
||||
|
restart: always |
||||
|
command: bin/solr start -c -f |
||||
|
volumes: |
||||
|
- solr_data:/opt/solr/server/solr |
||||
|
- solr_configsets:/opt/solr/server/solr/configsets |
||||
|
ports: |
||||
|
- "8983:8983" |
||||
|
|
||||
|
sapl: |
||||
|
image: interlegis/sapl:3.1.138 |
||||
|
# build: . |
||||
|
restart: always |
||||
|
environment: |
||||
|
ADMIN_PASSWORD: interlegis |
||||
|
ADMIN_EMAIL: email@dominio.net |
||||
|
DEBUG: 'False' |
||||
|
USE_TLS: 'False' |
||||
|
EMAIL_PORT: 587 |
||||
|
EMAIL_HOST: smtp.dominio.net |
||||
|
EMAIL_HOST_USER: usuariosmtp |
||||
|
EMAIL_HOST_PASSWORD: senhasmtp |
||||
|
USE_SOLR: 'True' |
||||
|
#SOLR_COLLECTION: sapl |
||||
|
#SOLR_HOST: saplsolr |
||||
|
SOLR_URL: http://saplsolr:8983/solr/sapl |
||||
|
TZ: America/Sao_Paulo |
||||
|
volumes: |
||||
|
- sapl_data:/var/interlegis/sapl/data |
||||
|
- sapl_media:/var/interlegis/sapl/media |
||||
|
- sapl_root:/var/interlegis/sapl |
||||
|
volumes_from: |
||||
|
- saplsolr |
||||
|
depends_on: |
||||
|
- sapldb |
||||
|
- saplsolr |
||||
|
ports: |
||||
|
- "80:80" |
||||
|
volumes: |
||||
|
sapldb_data: |
||||
|
sapl_data: |
||||
|
sapl_media: |
||||
|
sapl_root: |
||||
|
solr_data: |
||||
|
solr_configsets: |
@ -0,0 +1,54 @@ |
|||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
||||
|
# contributor license agreements. See the NOTICE file distributed with |
||||
|
# this work for additional information regarding copyright ownership. |
||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
# (the "License"); you may not use this file except in compliance with |
||||
|
# the License. You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
# a couple of test stopwords to test that the words are really being |
||||
|
# configured from this file: |
||||
|
stopworda |
||||
|
stopwordb |
||||
|
|
||||
|
# Standard english stop words taken from Lucene's StopAnalyzer |
||||
|
a |
||||
|
an |
||||
|
and |
||||
|
are |
||||
|
as |
||||
|
at |
||||
|
be |
||||
|
but |
||||
|
by |
||||
|
for |
||||
|
if |
||||
|
in |
||||
|
into |
||||
|
is |
||||
|
it |
||||
|
no |
||||
|
not |
||||
|
of |
||||
|
on |
||||
|
or |
||||
|
such |
||||
|
that |
||||
|
the |
||||
|
their |
||||
|
then |
||||
|
there |
||||
|
these |
||||
|
they |
||||
|
this |
||||
|
to |
||||
|
was |
||||
|
will |
||||
|
with |
@ -0,0 +1,253 @@ |
|||||
|
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt |
||||
|
| This file is distributed under the BSD License. |
||||
|
| See http://snowball.tartarus.org/license.php |
||||
|
| Also see http://www.opensource.org/licenses/bsd-license.html |
||||
|
| - Encoding was converted to UTF-8. |
||||
|
| - This notice was added. |
||||
|
| |
||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" |
||||
|
|
||||
|
| A Portuguese stop word list. Comments begin with vertical bar. Each stop |
||||
|
| word is at the start of a line. |
||||
|
|
||||
|
|
||||
|
| The following is a ranked list (commonest to rarest) of stopwords |
||||
|
| deriving from a large sample of text. |
||||
|
|
||||
|
| Extra words have been added at the end. |
||||
|
|
||||
|
de | of, from |
||||
|
a | the; to, at; her |
||||
|
o | the; him |
||||
|
que | who, that |
||||
|
e | and |
||||
|
do | de + o |
||||
|
da | de + a |
||||
|
em | in |
||||
|
um | a |
||||
|
para | for |
||||
|
| é from SER |
||||
|
com | with |
||||
|
não | not, no |
||||
|
uma | a |
||||
|
os | the; them |
||||
|
no | em + o |
||||
|
se | himself etc |
||||
|
na | em + a |
||||
|
por | for |
||||
|
mais | more |
||||
|
as | the; them |
||||
|
dos | de + os |
||||
|
como | as, like |
||||
|
mas | but |
||||
|
| foi from SER |
||||
|
ao | a + o |
||||
|
ele | he |
||||
|
das | de + as |
||||
|
| tem from TER |
||||
|
à | a + a |
||||
|
seu | his |
||||
|
sua | her |
||||
|
ou | or |
||||
|
| ser from SER |
||||
|
quando | when |
||||
|
muito | much |
||||
|
| há from HAV |
||||
|
nos | em + os; us |
||||
|
já | already, now |
||||
|
| está from EST |
||||
|
eu | I |
||||
|
também | also |
||||
|
só | only, just |
||||
|
pelo | per + o |
||||
|
pela | per + a |
||||
|
até | up to |
||||
|
isso | that |
||||
|
ela | he |
||||
|
entre | between |
||||
|
| era from SER |
||||
|
depois | after |
||||
|
sem | without |
||||
|
mesmo | same |
||||
|
aos | a + os |
||||
|
| ter from TER |
||||
|
seus | his |
||||
|
quem | whom |
||||
|
nas | em + as |
||||
|
me | me |
||||
|
esse | that |
||||
|
eles | they |
||||
|
| estão from EST |
||||
|
você | you |
||||
|
| tinha from TER |
||||
|
| foram from SER |
||||
|
essa | that |
||||
|
num | em + um |
||||
|
nem | nor |
||||
|
suas | her |
||||
|
meu | my |
||||
|
às | a + as |
||||
|
minha | my |
||||
|
| têm from TER |
||||
|
numa | em + uma |
||||
|
pelos | per + os |
||||
|
elas | they |
||||
|
| havia from HAV |
||||
|
| seja from SER |
||||
|
qual | which |
||||
|
| será from SER |
||||
|
nós | we |
||||
|
| tenho from TER |
||||
|
lhe | to him, her |
||||
|
deles | of them |
||||
|
essas | those |
||||
|
esses | those |
||||
|
pelas | per + as |
||||
|
este | this |
||||
|
| fosse from SER |
||||
|
dele | of him |
||||
|
|
||||
|
| other words. There are many contractions such as naquele = em+aquele, |
||||
|
| mo = me+o, but they are rare. |
||||
|
| Indefinite article plural forms are also rare. |
||||
|
|
||||
|
tu | thou |
||||
|
te | thee |
||||
|
vocês | you (plural) |
||||
|
vos | you |
||||
|
lhes | to them |
||||
|
meus | my |
||||
|
minhas |
||||
|
teu | thy |
||||
|
tua |
||||
|
teus |
||||
|
tuas |
||||
|
nosso | our |
||||
|
nossa |
||||
|
nossos |
||||
|
nossas |
||||
|
|
||||
|
dela | of her |
||||
|
delas | of them |
||||
|
|
||||
|
esta | this |
||||
|
estes | these |
||||
|
estas | these |
||||
|
aquele | that |
||||
|
aquela | that |
||||
|
aqueles | those |
||||
|
aquelas | those |
||||
|
isto | this |
||||
|
aquilo | that |
||||
|
|
||||
|
| forms of estar, to be (not including the infinitive): |
||||
|
estou |
||||
|
está |
||||
|
estamos |
||||
|
estão |
||||
|
estive |
||||
|
esteve |
||||
|
estivemos |
||||
|
estiveram |
||||
|
estava |
||||
|
estávamos |
||||
|
estavam |
||||
|
estivera |
||||
|
estivéramos |
||||
|
esteja |
||||
|
estejamos |
||||
|
estejam |
||||
|
estivesse |
||||
|
estivéssemos |
||||
|
estivessem |
||||
|
estiver |
||||
|
estivermos |
||||
|
estiverem |
||||
|
|
||||
|
| forms of haver, to have (not including the infinitive): |
||||
|
hei |
||||
|
há |
||||
|
havemos |
||||
|
hão |
||||
|
houve |
||||
|
houvemos |
||||
|
houveram |
||||
|
houvera |
||||
|
houvéramos |
||||
|
haja |
||||
|
hajamos |
||||
|
hajam |
||||
|
houvesse |
||||
|
houvéssemos |
||||
|
houvessem |
||||
|
houver |
||||
|
houvermos |
||||
|
houverem |
||||
|
houverei |
||||
|
houverá |
||||
|
houveremos |
||||
|
houverão |
||||
|
houveria |
||||
|
houveríamos |
||||
|
houveriam |
||||
|
|
||||
|
| forms of ser, to be (not including the infinitive): |
||||
|
sou |
||||
|
somos |
||||
|
são |
||||
|
era |
||||
|
éramos |
||||
|
eram |
||||
|
fui |
||||
|
foi |
||||
|
fomos |
||||
|
foram |
||||
|
fora |
||||
|
fôramos |
||||
|
seja |
||||
|
sejamos |
||||
|
sejam |
||||
|
fosse |
||||
|
fôssemos |
||||
|
fossem |
||||
|
for |
||||
|
formos |
||||
|
forem |
||||
|
serei |
||||
|
será |
||||
|
seremos |
||||
|
serão |
||||
|
seria |
||||
|
seríamos |
||||
|
seriam |
||||
|
|
||||
|
| forms of ter, to have (not including the infinitive): |
||||
|
tenho |
||||
|
tem |
||||
|
temos |
||||
|
tém |
||||
|
tinha |
||||
|
tínhamos |
||||
|
tinham |
||||
|
tive |
||||
|
teve |
||||
|
tivemos |
||||
|
tiveram |
||||
|
tivera |
||||
|
tivéramos |
||||
|
tenha |
||||
|
tenhamos |
||||
|
tenham |
||||
|
tivesse |
||||
|
tivéssemos |
||||
|
tivessem |
||||
|
tiver |
||||
|
tivermos |
||||
|
tiverem |
||||
|
terei |
||||
|
terá |
||||
|
teremos |
||||
|
terão |
||||
|
teria |
||||
|
teríamos |
||||
|
teriam |
@ -0,0 +1,573 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8" ?> |
||||
|
<!-- |
||||
|
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
|
contributor license agreements. See the NOTICE file distributed with |
||||
|
this work for additional information regarding copyright ownership. |
||||
|
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
(the "License"); you may not use this file except in compliance with |
||||
|
the License. You may obtain a copy of the License at |
||||
|
|
||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
|
||||
|
Unless required by applicable law or agreed to in writing, software |
||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
See the License for the specific language governing permissions and |
||||
|
limitations under the License. |
||||
|
--> |
||||
|
|
||||
|
<!-- |
||||
|
|
||||
|
This example schema is the recommended starting point for users. |
||||
|
It should be kept correct and concise, usable out-of-the-box. |
||||
|
|
||||
|
|
||||
|
For more information, on how to customize this file, please see |
||||
|
http://lucene.apache.org/solr/guide/documents-fields-and-schema-design.html |
||||
|
|
||||
|
PERFORMANCE NOTE: this schema includes many optional features and should not |
||||
|
be used for benchmarking. To improve performance one could |
||||
|
- set stored="false" for all fields possible (esp large fields) when you |
||||
|
only need to search on the field but don't need to return the original |
||||
|
value. |
||||
|
- set indexed="false" if you don't need to search on the field, but only |
||||
|
return the field as a result of searching on other indexed fields. |
||||
|
- remove all unneeded copyField statements |
||||
|
- for best index size and searching performance, set "index" to false |
||||
|
for all general text fields, use copyField to copy them to the |
||||
|
catchall "text" field, and use that for searching. |
||||
|
--> |
||||
|
|
||||
|
<schema name="default-config" version="1.6"> |
||||
|
<!-- attribute "name" is the name of this schema and is only used for display purposes. |
||||
|
version="x.y" is Solr's version number for the schema syntax and |
||||
|
semantics. It should not normally be changed by applications. |
||||
|
|
||||
|
1.0: multiValued attribute did not exist, all fields are multiValued |
||||
|
by nature |
||||
|
1.1: multiValued attribute introduced, false by default |
||||
|
1.2: omitTermFreqAndPositions attribute introduced, true by default |
||||
|
except for text fields. |
||||
|
1.3: removed optional field compress feature |
||||
|
1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser |
||||
|
behavior when a single string produces multiple tokens. Defaults |
||||
|
to off for version >= 1.4 |
||||
|
1.5: omitNorms defaults to true for primitive field types |
||||
|
(int, float, boolean, string...) |
||||
|
1.6: useDocValuesAsStored defaults to true. |
||||
|
--> |
||||
|
|
||||
|
<!-- Valid attributes for fields: |
||||
|
name: mandatory - the name for the field |
||||
|
type: mandatory - the name of a field type from the |
||||
|
fieldTypes section |
||||
|
indexed: true if this field should be indexed (searchable or sortable) |
||||
|
stored: true if this field should be retrievable |
||||
|
docValues: true if this field should have doc values. Doc Values is |
||||
|
recommended (required, if you are using *Point fields) for faceting, |
||||
|
grouping, sorting and function queries. Doc Values will make the index |
||||
|
faster to load, more NRT-friendly and more memory-efficient. |
||||
|
They are currently only supported by StrField, UUIDField, all |
||||
|
*PointFields, and depending on the field type, they might require |
||||
|
the field to be single-valued, be required or have a default value |
||||
|
(check the documentation of the field type you're interested in for |
||||
|
more information) |
||||
|
multiValued: true if this field may contain multiple values per document |
||||
|
omitNorms: (expert) set to true to omit the norms associated with |
||||
|
this field (this disables length normalization and index-time |
||||
|
boosting for the field, and saves some memory). Only full-text |
||||
|
fields or fields that need an index-time boost need norms. |
||||
|
Norms are omitted for primitive (non-analyzed) types by default. |
||||
|
termVectors: [false] set to true to store the term vector for a |
||||
|
given field. |
||||
|
When using MoreLikeThis, fields used for similarity should be |
||||
|
stored for best performance. |
||||
|
termPositions: Store position information with the term vector. |
||||
|
This will increase storage costs. |
||||
|
termOffsets: Store offset information with the term vector. This |
||||
|
will increase storage costs. |
||||
|
required: The field is required. It will throw an error if the |
||||
|
value does not exist |
||||
|
default: a value that should be used if no value is specified |
||||
|
when adding a document. |
||||
|
--> |
||||
|
|
||||
|
<!-- field names should consist of alphanumeric or underscore characters only and |
||||
|
not start with a digit. This is not currently strictly enforced, |
||||
|
but other field names will not have first class support from all components |
||||
|
and back compatibility is not guaranteed. Names with both leading and |
||||
|
trailing underscores (e.g. _version_) are reserved. |
||||
|
--> |
||||
|
|
||||
|
<!-- In this _default configset, only four fields are pre-declared: |
||||
|
id, _version_, and _text_ and _root_. All other fields will be type guessed and added via the |
||||
|
"add-unknown-fields-to-the-schema" update request processor chain declared in solrconfig.xml. |
||||
|
|
||||
|
Note that many dynamic fields are also defined - you can use them to specify a |
||||
|
field's type via field naming conventions - see below. |
||||
|
|
||||
|
WARNING: The _text_ catch-all field will significantly increase your index size. |
||||
|
If you don't need it, consider removing it and the corresponding copyField directive. |
||||
|
--> |
||||
|
|
||||
|
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> |
||||
|
<!-- docValues are enabled by default for long type so we don't need to index the version field --> |
||||
|
<field name="_version_" type="plong" indexed="false" stored="false"/> |
||||
|
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" /> |
||||
|
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/> |
||||
|
|
||||
|
<!-- Django fields --> |
||||
|
<field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> |
||||
|
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> |
||||
|
<field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> |
||||
|
|
||||
|
<!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default |
||||
|
because it's very expensive to index everything twice. --> |
||||
|
<!-- <copyField source="*" dest="_text_"/> --> |
||||
|
|
||||
|
<!-- Dynamic field definitions allow using convention over configuration |
||||
|
for fields via the specification of patterns to match field names. |
||||
|
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) |
||||
|
RESTRICTION: the glob-like pattern in the name attribute must have a "*" only at the start or the end. --> |
||||
|
|
||||
|
<dynamicField name="*_i" type="pint" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_is" type="pints" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_s" type="string" indexed="true" stored="true" /> |
||||
|
<dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_l" type="plong" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_t" type="text_general" indexed="true" stored="true" multiValued="false"/> |
||||
|
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> |
||||
|
|
||||
|
<!-- Type used for data-driven schema, to add a string copy for each text field --> |
||||
|
<dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" /> |
||||
|
|
||||
|
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/> |
||||
|
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> |
||||
|
|
||||
|
<!-- payloaded dynamic fields --> |
||||
|
<dynamicField name="*_dpf" type="delimited_payloads_float" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_dpi" type="delimited_payloads_int" indexed="true" stored="true"/> |
||||
|
<dynamicField name="*_dps" type="delimited_payloads_string" indexed="true" stored="true"/> |
||||
|
|
||||
|
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> |
||||
|
|
||||
|
<!-- Field to use to determine and enforce document uniqueness. |
||||
|
Unless this field is marked with required="false", it will be a required field |
||||
|
--> |
||||
|
<uniqueKey>id</uniqueKey> |
||||
|
|
||||
|
<!-- copyField commands copy one field to another at the time a document |
||||
|
is added to the index. It's used either to index the same field differently, |
||||
|
or to add multiple fields to the same field for easier/faster searching. |
||||
|
|
||||
|
<copyField source="sourceFieldName" dest="destinationFieldName"/> |
||||
|
--> |
||||
|
|
||||
|
<!-- field type definitions. The "name" attribute is |
||||
|
just a label to be used by field definitions. The "class" |
||||
|
attribute and any other attributes determine the real |
||||
|
behavior of the fieldType. |
||||
|
Class names starting with "solr" refer to java classes in a |
||||
|
standard package such as org.apache.solr.analysis |
||||
|
--> |
||||
|
|
||||
|
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are |
||||
|
currently supported on types that are sorted internally as strings |
||||
|
and on numeric types. |
||||
|
This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble". |
||||
|
- If sortMissingLast="true", then a sort on this field will cause documents |
||||
|
without the field to come after documents with the field, |
||||
|
regardless of the requested sort order (asc or desc). |
||||
|
- If sortMissingFirst="true", then a sort on this field will cause documents |
||||
|
without the field to come before documents with the field, |
||||
|
regardless of the requested sort order. |
||||
|
- If sortMissingLast="false" and sortMissingFirst="false" (the default), |
||||
|
then default lucene sorting will be used which places docs without the |
||||
|
field first in an ascending sort and last in a descending sort. |
||||
|
--> |
||||
|
|
||||
|
<!-- The StrField type is not analyzed, but indexed/stored verbatim. --> |
||||
|
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" /> |
||||
|
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" /> |
||||
|
|
||||
|
<!-- boolean type: "true" or "false" --> |
||||
|
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> |
||||
|
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> |
||||
|
|
||||
|
<!-- |
||||
|
Numeric field types that index values using KD-trees. |
||||
|
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. |
||||
|
--> |
||||
|
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> |
||||
|
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> |
||||
|
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> |
||||
|
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> |
||||
|
|
||||
|
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> |
||||
|
|
||||
|
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and |
||||
|
is a more restricted form of the canonical representation of dateTime |
||||
|
http://www.w3.org/TR/xmlschema-2/#dateTime |
||||
|
The trailing "Z" designates UTC time and is mandatory. |
||||
|
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
||||
|
All other components are mandatory. |
||||
|
|
||||
|
Expressions can also be used to denote calculations that should be |
||||
|
performed relative to "NOW" to determine the value, ie... |
||||
|
|
||||
|
NOW/HOUR |
||||
|
... Round to the start of the current hour |
||||
|
NOW-1DAY |
||||
|
... Exactly 1 day prior to now |
||||
|
NOW/DAY+6MONTHS+3DAYS |
||||
|
... 6 months and 3 days in the future from the start of |
||||
|
the current day |
||||
|
|
||||
|
--> |
||||
|
<!-- KD-tree versions of date fields --> |
||||
|
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> |
||||
|
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> |
||||
|
|
||||
|
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> |
||||
|
<fieldType name="binary" class="solr.BinaryField"/> |
||||
|
|
||||
|
<!-- solr.TextField allows the specification of custom text analyzers |
||||
|
specified as a tokenizer and a list of token filters. Different |
||||
|
analyzers may be specified for indexing and querying. |
||||
|
|
||||
|
The optional positionIncrementGap puts space between multiple fields of |
||||
|
this type on the same document, with the purpose of preventing false phrase |
||||
|
matching across fields. |
||||
|
|
||||
|
For more info on customizing your analyzer chain, please see |
||||
|
http://lucene.apache.org/solr/guide/understanding-analyzers-tokenizers-and-filters.html#understanding-analyzers-tokenizers-and-filters |
||||
|
--> |
||||
|
|
||||
|
<!-- One can also specify an existing Analyzer class that has a |
||||
|
default constructor via the class attribute on the analyzer element. |
||||
|
Example: |
||||
|
<fieldType name="text_greek" class="solr.TextField"> |
||||
|
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> |
||||
|
</fieldType> |
||||
|
--> |
||||
|
|
||||
|
<!-- A text field that only splits on whitespace for exact matching of words --> |
||||
|
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- A general text field that has reasonable, generic |
||||
|
cross-language defaults: it tokenizes with StandardTokenizer, |
||||
|
removes stop words from case-insensitive "stopwords.txt" |
||||
|
(empty by default), and down cases. At query time only, it |
||||
|
also applies synonyms. |
||||
|
--> |
||||
|
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<!-- in this example, we will only use synonyms at query time |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
<filter class="solr.FlattenGraphFilterFactory"/> |
||||
|
--> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
|
||||
|
<!-- SortableTextField generaly functions exactly like TextField, |
||||
|
except that it supports, and by default uses, docValues for sorting (or faceting) |
||||
|
on the first 1024 characters of the original field values (which is configurable). |
||||
|
|
||||
|
This makes it a bit more useful then TextField in many situations, but the trade-off |
||||
|
is that it takes up more space on disk; which is why it's not used in place of TextField |
||||
|
for every fieldType in this _default schema. |
||||
|
--> |
||||
|
<dynamicField name="*_t_sort" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/> |
||||
|
<dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, |
||||
|
removes English stop words (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and |
||||
|
finally applies Porter's stemming. The query time analyzer also applies synonyms from synonyms.txt. --> |
||||
|
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<!-- in this example, we will only use synonyms at query time |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
<filter class="solr.FlattenGraphFilterFactory"/> |
||||
|
--> |
||||
|
<!-- Case insensitive stop word removal. |
||||
|
--> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.EnglishPossessiveFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
--> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.EnglishPossessiveFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
--> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- A text field with defaults appropriate for English, plus |
||||
|
aggressive word-splitting and autophrase features enabled. |
||||
|
This field is just like text_en, except it adds |
||||
|
WordDelimiterGraphFilter to enable splitting and matching of |
||||
|
words on case-change, alpha numeric boundaries, and |
||||
|
non-alphanumeric chars. This means certain compound word |
||||
|
cases will work, for example query "wi fi" will match |
||||
|
document "WiFi" or "wi-fi". |
||||
|
--> |
||||
|
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<!-- in this example, we will only use synonyms at query time |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
--> |
||||
|
<!-- Case insensitive stop word removal. |
||||
|
--> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
<filter class="solr.FlattenGraphFilterFactory" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- Less flexible matching, but less false matches. Probably not ideal for product names, |
||||
|
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> |
||||
|
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> |
||||
|
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
||||
|
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
||||
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
||||
|
<filter class="solr.FlattenGraphFilterFactory" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> |
||||
|
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
||||
|
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
||||
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- Just like text_general except it reverses the characters of |
||||
|
each token, to enable more efficient leading wildcard queries. |
||||
|
--> |
||||
|
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" |
||||
|
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> |
||||
|
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- lowercases the entire field value, keeping it as a single token. --> |
||||
|
<dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> |
||||
|
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory" /> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- |
||||
|
Example of using PathHierarchyTokenizerFactory at index time, so |
||||
|
queries for paths match documents at that path, or in descendent paths |
||||
|
--> |
||||
|
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> |
||||
|
<fieldType name="descendent_path" class="solr.TextField"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.KeywordTokenizerFactory" /> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- |
||||
|
Example of using PathHierarchyTokenizerFactory at query time, so |
||||
|
queries for paths match documents at that path, or in ancestor paths |
||||
|
--> |
||||
|
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> |
||||
|
<fieldType name="ancestor_path" class="solr.TextField"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.KeywordTokenizerFactory" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- This point type indexes the coordinates as separate fields (subFields) |
||||
|
If subFieldType is defined, it references a type, and a dynamic field |
||||
|
definition is created matching *___<typename>. Alternately, if |
||||
|
subFieldSuffix is defined, that is used to create the subFields. |
||||
|
Example: if subFieldType="double", then the coordinates would be |
||||
|
indexed in fields myloc_0___double,myloc_1___double. |
||||
|
Example: if subFieldSuffix="_d" then the coordinates would be indexed |
||||
|
in fields myloc_0_d,myloc_1_d |
||||
|
The subFields are an implementation detail of the fieldType, and end |
||||
|
users normally should not need to know about them. |
||||
|
--> |
||||
|
<dynamicField name="*_point" type="point" indexed="true" stored="true"/> |
||||
|
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
||||
|
|
||||
|
<!-- A specialized field for geospatial search filters and distance sorting. --> |
||||
|
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/> |
||||
|
|
||||
|
<!-- A geospatial field type that supports multiValued and polygon shapes. |
||||
|
For more information about this and other spatial fields see: |
||||
|
http://lucene.apache.org/solr/guide/spatial-search.html |
||||
|
--> |
||||
|
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" |
||||
|
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> |
||||
|
|
||||
|
<!-- Payloaded field types --> |
||||
|
<fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
<fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
<fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- Portuguese --> |
||||
|
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer> |
||||
|
<charFilter class="solr.HTMLStripCharFilterFactory"/> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> |
||||
|
<filter class="solr.PortugueseLightStemFilterFactory"/> |
||||
|
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> |
||||
|
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> |
||||
|
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- Similarity is the scoring routine for each document vs. a query. |
||||
|
A custom Similarity or SimilarityFactory may be specified here, but |
||||
|
the default is fine for most applications. |
||||
|
For more info: http://lucene.apache.org/solr/guide/other-schema-elements.html#OtherSchemaElements-Similarity |
||||
|
--> |
||||
|
<!-- |
||||
|
<similarity class="com.example.solr.CustomSimilarityFactory"> |
||||
|
<str name="paramkey">param value</str> |
||||
|
</similarity> |
||||
|
--> |
||||
|
|
||||
|
</schema> |
@ -0,0 +1,20 @@ |
|||||
|
{"params":{ |
||||
|
"query":{ |
||||
|
"defType":"edismax", |
||||
|
"q.alt":"*:*", |
||||
|
"rows":"10", |
||||
|
"fl":"*,score", |
||||
|
"":{"v":0} |
||||
|
}, |
||||
|
"facets":{ |
||||
|
"facet":"on", |
||||
|
"facet.mincount": "1", |
||||
|
"":{"v":0} |
||||
|
}, |
||||
|
"velocity":{ |
||||
|
"wt": "velocity", |
||||
|
"v.template":"browse", |
||||
|
"v.layout": "layout", |
||||
|
"":{"v":0} |
||||
|
} |
||||
|
}} |
@ -0,0 +1,21 @@ |
|||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
# (the "License"); you may not use this file except in compliance with |
||||
|
# the License. You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
#----------------------------------------------------------------------- |
||||
|
# Use a protected word file to protect against the stemmer reducing two |
||||
|
# unrelated words to the same base word. |
||||
|
|
||||
|
# Some non-words that normally won't be encountered, |
||||
|
# just to test that they won't be stemmed. |
||||
|
dontstems |
||||
|
zwhacky |
||||
|
|
Binary file not shown.
@ -0,0 +1,165 @@ |
|||||
|
<?xml version="1.0" ?> |
||||
|
<!-- |
||||
|
Licensed to the Apache Software Foundation (ASF) under one or more |
||||
|
contributor license agreements. See the NOTICE file distributed with |
||||
|
this work for additional information regarding copyright ownership. |
||||
|
The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
(the "License"); you may not use this file except in compliance with |
||||
|
the License. You may obtain a copy of the License at |
||||
|
|
||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
|
||||
|
Unless required by applicable law or agreed to in writing, software |
||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
See the License for the specific language governing permissions and |
||||
|
limitations under the License. |
||||
|
--> |
||||
|
|
||||
|
<schema name="default" version="1.6"> |
||||
|
<types> |
||||
|
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> |
||||
|
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> |
||||
|
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> |
||||
|
<fieldtype name="binary" class="solr.BinaryField"/> |
||||
|
|
||||
|
<!-- Numeric field types that manipulate the value into |
||||
|
a string value that isn't human-readable in its internal form, |
||||
|
but with a lexicographic ordering the same as the numeric ordering, |
||||
|
so that range queries work correctly. --> |
||||
|
<fieldType name="pint" class="solr.IntPointField" docValues="true" /> |
||||
|
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true" /> |
||||
|
<fieldType name="plong" class="solr.LongPointField" docValues="true" /> |
||||
|
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> |
||||
|
|
||||
|
|
||||
|
<fieldType name="pdate" class="solr.DatePointField" docValues="true" /> |
||||
|
<!-- A Trie based date field ifor faster date range queries and date faceting. --> |
||||
|
|
||||
|
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> |
||||
|
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> |
||||
|
|
||||
|
|
||||
|
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
||||
|
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> |
||||
|
<fieldtype name="geohash" class="solr.GeoHashField"/> |
||||
|
|
||||
|
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<!-- in this example, we will only use synonyms at query time |
||||
|
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
||||
|
--> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<!-- Portuguese --> |
||||
|
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> |
||||
|
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> |
||||
|
<filter class="solr.PortugueseLightStemFilterFactory"/> |
||||
|
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> |
||||
|
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> |
||||
|
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
|
||||
|
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.EnglishPossessiveFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
--> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.StandardTokenizerFactory"/> |
||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
||||
|
<filter class="solr.StopFilterFactory" |
||||
|
ignoreCase="true" |
||||
|
words="lang/stopwords_en.txt" |
||||
|
/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.EnglishPossessiveFilterFactory"/> |
||||
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
||||
|
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
||||
|
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
||||
|
--> |
||||
|
<filter class="solr.PorterStemFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
||||
|
<analyzer> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<fieldType name="ngram" class="solr.TextField" > |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.KeywordTokenizerFactory"/> |
||||
|
<filter class="solr.LowerCaseFilterFactory"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
|
||||
|
<fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1"> |
||||
|
<analyzer type="index"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" /> |
||||
|
<filter class="solr.LowerCaseFilterFactory" /> |
||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
||||
|
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" /> |
||||
|
</analyzer> |
||||
|
<analyzer type="query"> |
||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory" /> |
||||
|
<filter class="solr.LowerCaseFilterFactory" /> |
||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
||||
|
</analyzer> |
||||
|
</fieldType> |
||||
|
</types> |
||||
|
|
||||
|
<fields> |
||||
|
<!-- general --> |
||||
|
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/> |
||||
|
<field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> |
||||
|
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> |
||||
|
<field name="_version_" type="plong" indexed="true" stored ="true"/> |
||||
|
<field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> |
||||
|
|
||||
|
</fields> |
||||
|
|
||||
|
<!-- field to use to determine and enforce document uniqueness. --> |
||||
|
<uniqueKey>id</uniqueKey> |
||||
|
|
||||
|
<!-- field for the QueryParser to use when an explicit fieldname is absent --> |
||||
|
<df>text</df> |
||||
|
|
||||
|
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> |
||||
|
<solrQueryParser q.op="AND"/> |
||||
|
</schema> |
File diff suppressed because it is too large
@ -0,0 +1,14 @@ |
|||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
||||
|
# contributor license agreements. See the NOTICE file distributed with |
||||
|
# this work for additional information regarding copyright ownership. |
||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
# (the "License"); you may not use this file except in compliance with |
||||
|
# the License. You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
@ -0,0 +1,29 @@ |
|||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
|
# (the "License"); you may not use this file except in compliance with |
||||
|
# the License. You may obtain a copy of the License at |
||||
|
# |
||||
|
# http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
# |
||||
|
# Unless required by applicable law or agreed to in writing, software |
||||
|
# distributed under the License is distributed on an "AS IS" BASIS, |
||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
|
# See the License for the specific language governing permissions and |
||||
|
# limitations under the License. |
||||
|
|
||||
|
#----------------------------------------------------------------------- |
||||
|
#some test synonym mappings unlikely to appear in real input text |
||||
|
aaafoo => aaabar |
||||
|
bbbfoo => bbbfoo bbbbar |
||||
|
cccfoo => cccbar cccbaz |
||||
|
fooaaa,baraaa,bazaaa |
||||
|
|
||||
|
# Some synonym groups specific to this example |
||||
|
GB,gib,gigabyte,gigabytes |
||||
|
MB,mib,megabyte,megabytes |
||||
|
Television, Televisions, TV, TVs |
||||
|
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming |
||||
|
#after us won't split it into two words. |
||||
|
|
||||
|
# Synonym mappings can be used for spelling correction too |
||||
|
pixima => pixma |
||||
|
|
@ -0,0 +1,155 @@ |
|||||
|
|
||||
|
import requests |
||||
|
import subprocess |
||||
|
import sys |
||||
|
import argparse |
||||
|
|
||||
|
|
||||
|
class SolrClient: |
||||
|
|
||||
|
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json" |
||||
|
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json" |
||||
|
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json" |
||||
|
STATUS_COLLECTION = "{}/solr/admin/collections?action=CLUSTERSTATUS&collection={}&wt=json" |
||||
|
STATUS_CORE = "{}/admin/cores?action=STATUS&name={}" |
||||
|
EXISTS_COLLECTION = "{}/solr/{}/admin/ping?wt=json" |
||||
|
OPTIMIZE_COLLECTION = "{}/solr/{}/update?optimize=true&wt=json" |
||||
|
CREATE_COLLECTION = "{}/solr/admin/collections?action=CREATE&name={}&collection.configName={}&numShards={}&replicationFactor={}&maxShardsPerNode={}&wt=json" |
||||
|
DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json" |
||||
|
DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" |
||||
|
QUERY_DATA = "{}/solr/{}/select?q=*:*" |
||||
|
|
||||
|
CONFIGSET_NAME = "sapl_configset" |
||||
|
|
||||
|
def __init__(self, url): |
||||
|
self.url = url |
||||
|
|
||||
|
def get_num_docs(self, collection_name): |
||||
|
final_url = self.QUERY_DATA.format(self.url, collection_name) |
||||
|
res = requests.get(final_url) |
||||
|
dic = res.json() |
||||
|
num_docs = dic["response"]["numFound"] |
||||
|
return num_docs |
||||
|
|
||||
|
def list_collections(self): |
||||
|
req_url = self.LIST_COLLECTIONS.format(self.url) |
||||
|
res = requests.get(req_url) |
||||
|
dic = res.json() |
||||
|
return dic['collections'] |
||||
|
|
||||
|
def exists_collection(self, collection_name): |
||||
|
collections = self.list_collections() |
||||
|
return True if collection_name in collections else False |
||||
|
|
||||
|
def maybe_upload_configset(self, force=False): |
||||
|
req_url = self.LIST_CONFIGSETS.format(self.url) |
||||
|
res = requests.get(req_url) |
||||
|
dic = res.json() |
||||
|
configsets = dic['configSets'] |
||||
|
# UPLOAD configset |
||||
|
if not self.CONFIGSET_NAME in configsets or force: |
||||
|
files = {'file': ('saplconfigset.zip', |
||||
|
open('./solr/sapl_configset/conf/saplconfigset.zip', |
||||
|
'rb'), |
||||
|
'application/octet-stream', |
||||
|
{'Expires': '0'})} |
||||
|
|
||||
|
req_url = self.UPLOAD_CONFIGSET.format(self.url, self.CONFIGSET_NAME) |
||||
|
|
||||
|
resp = requests.post(req_url, files=files) |
||||
|
print(resp.content) |
||||
|
else: |
||||
|
print('O %s já presente no servidor, NÃO enviando.' % self.CONFIGSET_NAME) |
||||
|
|
||||
|
def create_collection(self, collection_name, shards=1, replication_factor=1, max_shards_per_node=1): |
||||
|
self.maybe_upload_configset() |
||||
|
req_url = self.CREATE_COLLECTION.format(self.url, |
||||
|
collection_name, |
||||
|
self.CONFIGSET_NAME, |
||||
|
shards, |
||||
|
replication_factor, |
||||
|
max_shards_per_node) |
||||
|
res = requests.post(req_url) |
||||
|
if res.ok: |
||||
|
print("Collection '%s' created succesfully" % collection_name) |
||||
|
else: |
||||
|
print("Error creating collection '%s'" % collection_name) |
||||
|
as_json = res.json() |
||||
|
print("Error %s: %s" % (res.status_code, as_json['error']['msg'])) |
||||
|
return False |
||||
|
return True |
||||
|
|
||||
|
def delete_collection(self, collection_name): |
||||
|
if collection_name == '*': |
||||
|
collections = self.list_collections() |
||||
|
else: |
||||
|
collections = [collection_name] |
||||
|
|
||||
|
for c in collections: |
||||
|
req_url = self.DELETE_COLLECTION.format(self.url, c) |
||||
|
res = requests.post(req_url) |
||||
|
if not res.ok: |
||||
|
print("Error deleting collection '%s'", c) |
||||
|
print("Code {}: {}".format(res.status_code, res.text)) |
||||
|
else: |
||||
|
print("Collection '%s' deleted successfully!" % c) |
||||
|
|
||||
|
def delete_index_data(self, collection_name): |
||||
|
req_url = self.DELETE_DATA.format(self.url, collection_name) |
||||
|
res = requests.post(req_url, |
||||
|
data='<delete><query>*:*</query></delete>', |
||||
|
headers={'Content-Type': 'application/xml'}) |
||||
|
if not res.ok: |
||||
|
print("Error deleting index for collection '%s'", collection_name) |
||||
|
print("Code {}: {}".format(res.status_code, res.text)) |
||||
|
else: |
||||
|
print("Collection '%s' data deleted successfully!" % collection_name) |
||||
|
|
||||
|
num_docs = self.get_num_docs(collection_name) |
||||
|
print("Num docs: %s" % num_docs) |
||||
|
|
||||
|
|
||||
|
if __name__ == '__main__': |
||||
|
|
||||
|
parser = argparse.ArgumentParser(description='Cria uma collection no Solr') |
||||
|
|
||||
|
# required arguments |
||||
|
parser.add_argument('-u', type=str, metavar='URL', nargs=1, dest='url', |
||||
|
required=True, help='Endereço do servidor Solr na forma http(s)://<address>[:port]') |
||||
|
parser.add_argument('-c', type=str, metavar='COLLECTION', dest='collection', nargs=1, |
||||
|
required=True, help='Collection Solr a ser criada') |
||||
|
|
||||
|
# optional arguments |
||||
|
parser.add_argument('-s', type=int, dest='shards', nargs='?', |
||||
|
help='Number of shards (default=1)', default=1) |
||||
|
parser.add_argument('-rf', type=int, dest='replication_factor', nargs='?', |
||||
|
help='Replication factor (default=1)', default=1) |
||||
|
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?', |
||||
|
help='Max shards per node (default=1)', default=1) |
||||
|
|
||||
|
try: |
||||
|
args = parser.parse_args() |
||||
|
except IOError as msg: |
||||
|
parser.error(str(msg)) |
||||
|
sys.exit(-1) |
||||
|
|
||||
|
url = args.url.pop() |
||||
|
collection = args.collection.pop() |
||||
|
|
||||
|
client = SolrClient(url=url) |
||||
|
|
||||
|
if not client.exists_collection(collection): |
||||
|
print("Collection '%s' doesn't exists. Creating a new one..." % collection) |
||||
|
created = client.create_collection(collection, |
||||
|
shards=args.shards, |
||||
|
replication_factor=args.replication_factor, |
||||
|
max_shards_per_node=args.max_shards_per_node) |
||||
|
if not created: |
||||
|
sys.exit(-1) |
||||
|
else: |
||||
|
print("Collection '%s' exists." % collection) |
||||
|
|
||||
|
num_docs = client.get_num_docs(collection) |
||||
|
if num_docs == 0: |
||||
|
print("Performing a full reindex of '%s' collection..." % collection) |
||||
|
p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) |
Loading…
Reference in new issue