mirror of https://github.com/interlegis/sapl.git
Edward
6 years ago
committed by
GitHub
20 changed files with 2797 additions and 55 deletions
@ -0,0 +1,61 @@ |
|||
version: '2' |
|||
services: |
|||
sapldb: |
|||
image: postgres:10.5-alpine |
|||
restart: always |
|||
environment: |
|||
POSTGRES_PASSWORD: sapl |
|||
POSTGRES_USER: sapl |
|||
POSTGRES_DB: sapl |
|||
PGDATA : /var/lib/postgresql/data/ |
|||
volumes: |
|||
- sapldb_data:/var/lib/postgresql/data/ |
|||
ports: |
|||
- "5432:5432" |
|||
|
|||
saplsolr: |
|||
image: solr:7.4-alpine |
|||
restart: always |
|||
command: bin/solr start -c -f |
|||
volumes: |
|||
- solr_data:/opt/solr/server/solr |
|||
- solr_configsets:/opt/solr/server/solr/configsets |
|||
ports: |
|||
- "8983:8983" |
|||
|
|||
sapl: |
|||
image: interlegis/sapl:3.1.138 |
|||
# build: . |
|||
restart: always |
|||
environment: |
|||
ADMIN_PASSWORD: interlegis |
|||
ADMIN_EMAIL: email@dominio.net |
|||
DEBUG: 'False' |
|||
USE_TLS: 'False' |
|||
EMAIL_PORT: 587 |
|||
EMAIL_HOST: smtp.dominio.net |
|||
EMAIL_HOST_USER: usuariosmtp |
|||
EMAIL_HOST_PASSWORD: senhasmtp |
|||
USE_SOLR: 'True' |
|||
#SOLR_COLLECTION: sapl |
|||
#SOLR_HOST: saplsolr |
|||
SOLR_URL: http://saplsolr:8983/solr/sapl |
|||
TZ: America/Sao_Paulo |
|||
volumes: |
|||
- sapl_data:/var/interlegis/sapl/data |
|||
- sapl_media:/var/interlegis/sapl/media |
|||
- sapl_root:/var/interlegis/sapl |
|||
volumes_from: |
|||
- saplsolr |
|||
depends_on: |
|||
- sapldb |
|||
- saplsolr |
|||
ports: |
|||
- "80:80" |
|||
volumes: |
|||
sapldb_data: |
|||
sapl_data: |
|||
sapl_media: |
|||
sapl_root: |
|||
solr_data: |
|||
solr_configsets: |
@ -0,0 +1,54 @@ |
|||
# Licensed to the Apache Software Foundation (ASF) under one or more |
|||
# contributor license agreements. See the NOTICE file distributed with |
|||
# this work for additional information regarding copyright ownership. |
|||
# The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
# (the "License"); you may not use this file except in compliance with |
|||
# the License. You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
# a couple of test stopwords to test that the words are really being |
|||
# configured from this file: |
|||
stopworda |
|||
stopwordb |
|||
|
|||
# Standard english stop words taken from Lucene's StopAnalyzer |
|||
a |
|||
an |
|||
and |
|||
are |
|||
as |
|||
at |
|||
be |
|||
but |
|||
by |
|||
for |
|||
if |
|||
in |
|||
into |
|||
is |
|||
it |
|||
no |
|||
not |
|||
of |
|||
on |
|||
or |
|||
such |
|||
that |
|||
the |
|||
their |
|||
then |
|||
there |
|||
these |
|||
they |
|||
this |
|||
to |
|||
was |
|||
will |
|||
with |
@ -0,0 +1,253 @@ |
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt |
|||
| This file is distributed under the BSD License. |
|||
| See http://snowball.tartarus.org/license.php |
|||
| Also see http://www.opensource.org/licenses/bsd-license.html |
|||
| - Encoding was converted to UTF-8. |
|||
| - This notice was added. |
|||
| |
|||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" |
|||
|
|||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop |
|||
| word is at the start of a line. |
|||
|
|||
|
|||
| The following is a ranked list (commonest to rarest) of stopwords |
|||
| deriving from a large sample of text. |
|||
|
|||
| Extra words have been added at the end. |
|||
|
|||
de | of, from |
|||
a | the; to, at; her |
|||
o | the; him |
|||
que | who, that |
|||
e | and |
|||
do | de + o |
|||
da | de + a |
|||
em | in |
|||
um | a |
|||
para | for |
|||
| é from SER |
|||
com | with |
|||
não | not, no |
|||
uma | a |
|||
os | the; them |
|||
no | em + o |
|||
se | himself etc |
|||
na | em + a |
|||
por | for |
|||
mais | more |
|||
as | the; them |
|||
dos | de + os |
|||
como | as, like |
|||
mas | but |
|||
| foi from SER |
|||
ao | a + o |
|||
ele | he |
|||
das | de + as |
|||
| tem from TER |
|||
à | a + a |
|||
seu | his |
|||
sua | her |
|||
ou | or |
|||
| ser from SER |
|||
quando | when |
|||
muito | much |
|||
| há from HAV |
|||
nos | em + os; us |
|||
já | already, now |
|||
| está from EST |
|||
eu | I |
|||
também | also |
|||
só | only, just |
|||
pelo | per + o |
|||
pela | per + a |
|||
até | up to |
|||
isso | that |
|||
ela | he |
|||
entre | between |
|||
| era from SER |
|||
depois | after |
|||
sem | without |
|||
mesmo | same |
|||
aos | a + os |
|||
| ter from TER |
|||
seus | his |
|||
quem | whom |
|||
nas | em + as |
|||
me | me |
|||
esse | that |
|||
eles | they |
|||
| estão from EST |
|||
você | you |
|||
| tinha from TER |
|||
| foram from SER |
|||
essa | that |
|||
num | em + um |
|||
nem | nor |
|||
suas | her |
|||
meu | my |
|||
às | a + as |
|||
minha | my |
|||
| têm from TER |
|||
numa | em + uma |
|||
pelos | per + os |
|||
elas | they |
|||
| havia from HAV |
|||
| seja from SER |
|||
qual | which |
|||
| será from SER |
|||
nós | we |
|||
| tenho from TER |
|||
lhe | to him, her |
|||
deles | of them |
|||
essas | those |
|||
esses | those |
|||
pelas | per + as |
|||
este | this |
|||
| fosse from SER |
|||
dele | of him |
|||
|
|||
| other words. There are many contractions such as naquele = em+aquele, |
|||
| mo = me+o, but they are rare. |
|||
| Indefinite article plural forms are also rare. |
|||
|
|||
tu | thou |
|||
te | thee |
|||
vocês | you (plural) |
|||
vos | you |
|||
lhes | to them |
|||
meus | my |
|||
minhas |
|||
teu | thy |
|||
tua |
|||
teus |
|||
tuas |
|||
nosso | our |
|||
nossa |
|||
nossos |
|||
nossas |
|||
|
|||
dela | of her |
|||
delas | of them |
|||
|
|||
esta | this |
|||
estes | these |
|||
estas | these |
|||
aquele | that |
|||
aquela | that |
|||
aqueles | those |
|||
aquelas | those |
|||
isto | this |
|||
aquilo | that |
|||
|
|||
| forms of estar, to be (not including the infinitive): |
|||
estou |
|||
está |
|||
estamos |
|||
estão |
|||
estive |
|||
esteve |
|||
estivemos |
|||
estiveram |
|||
estava |
|||
estávamos |
|||
estavam |
|||
estivera |
|||
estivéramos |
|||
esteja |
|||
estejamos |
|||
estejam |
|||
estivesse |
|||
estivéssemos |
|||
estivessem |
|||
estiver |
|||
estivermos |
|||
estiverem |
|||
|
|||
| forms of haver, to have (not including the infinitive): |
|||
hei |
|||
há |
|||
havemos |
|||
hão |
|||
houve |
|||
houvemos |
|||
houveram |
|||
houvera |
|||
houvéramos |
|||
haja |
|||
hajamos |
|||
hajam |
|||
houvesse |
|||
houvéssemos |
|||
houvessem |
|||
houver |
|||
houvermos |
|||
houverem |
|||
houverei |
|||
houverá |
|||
houveremos |
|||
houverão |
|||
houveria |
|||
houveríamos |
|||
houveriam |
|||
|
|||
| forms of ser, to be (not including the infinitive): |
|||
sou |
|||
somos |
|||
são |
|||
era |
|||
éramos |
|||
eram |
|||
fui |
|||
foi |
|||
fomos |
|||
foram |
|||
fora |
|||
fôramos |
|||
seja |
|||
sejamos |
|||
sejam |
|||
fosse |
|||
fôssemos |
|||
fossem |
|||
for |
|||
formos |
|||
forem |
|||
serei |
|||
será |
|||
seremos |
|||
serão |
|||
seria |
|||
seríamos |
|||
seriam |
|||
|
|||
| forms of ter, to have (not including the infinitive): |
|||
tenho |
|||
tem |
|||
temos |
|||
tém |
|||
tinha |
|||
tínhamos |
|||
tinham |
|||
tive |
|||
teve |
|||
tivemos |
|||
tiveram |
|||
tivera |
|||
tivéramos |
|||
tenha |
|||
tenhamos |
|||
tenham |
|||
tivesse |
|||
tivéssemos |
|||
tivessem |
|||
tiver |
|||
tivermos |
|||
tiverem |
|||
terei |
|||
terá |
|||
teremos |
|||
terão |
|||
teria |
|||
teríamos |
|||
teriam |
@ -0,0 +1,573 @@ |
|||
<?xml version="1.0" encoding="UTF-8" ?> |
|||
<!-- |
|||
Licensed to the Apache Software Foundation (ASF) under one or more |
|||
contributor license agreements. See the NOTICE file distributed with |
|||
this work for additional information regarding copyright ownership. |
|||
The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
(the "License"); you may not use this file except in compliance with |
|||
the License. You may obtain a copy of the License at |
|||
|
|||
http://www.apache.org/licenses/LICENSE-2.0 |
|||
|
|||
Unless required by applicable law or agreed to in writing, software |
|||
distributed under the License is distributed on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
See the License for the specific language governing permissions and |
|||
limitations under the License. |
|||
--> |
|||
|
|||
<!-- |
|||
|
|||
This example schema is the recommended starting point for users. |
|||
It should be kept correct and concise, usable out-of-the-box. |
|||
|
|||
|
|||
For more information, on how to customize this file, please see |
|||
http://lucene.apache.org/solr/guide/documents-fields-and-schema-design.html |
|||
|
|||
PERFORMANCE NOTE: this schema includes many optional features and should not |
|||
be used for benchmarking. To improve performance one could |
|||
- set stored="false" for all fields possible (esp large fields) when you |
|||
only need to search on the field but don't need to return the original |
|||
value. |
|||
- set indexed="false" if you don't need to search on the field, but only |
|||
return the field as a result of searching on other indexed fields. |
|||
- remove all unneeded copyField statements |
|||
- for best index size and searching performance, set "index" to false |
|||
for all general text fields, use copyField to copy them to the |
|||
catchall "text" field, and use that for searching. |
|||
--> |
|||
|
|||
<schema name="default-config" version="1.6"> |
|||
<!-- attribute "name" is the name of this schema and is only used for display purposes. |
|||
version="x.y" is Solr's version number for the schema syntax and |
|||
semantics. It should not normally be changed by applications. |
|||
|
|||
1.0: multiValued attribute did not exist, all fields are multiValued |
|||
by nature |
|||
1.1: multiValued attribute introduced, false by default |
|||
1.2: omitTermFreqAndPositions attribute introduced, true by default |
|||
except for text fields. |
|||
1.3: removed optional field compress feature |
|||
1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser |
|||
behavior when a single string produces multiple tokens. Defaults |
|||
to off for version >= 1.4 |
|||
1.5: omitNorms defaults to true for primitive field types |
|||
(int, float, boolean, string...) |
|||
1.6: useDocValuesAsStored defaults to true. |
|||
--> |
|||
|
|||
<!-- Valid attributes for fields: |
|||
name: mandatory - the name for the field |
|||
type: mandatory - the name of a field type from the |
|||
fieldTypes section |
|||
indexed: true if this field should be indexed (searchable or sortable) |
|||
stored: true if this field should be retrievable |
|||
docValues: true if this field should have doc values. Doc Values is |
|||
recommended (required, if you are using *Point fields) for faceting, |
|||
grouping, sorting and function queries. Doc Values will make the index |
|||
faster to load, more NRT-friendly and more memory-efficient. |
|||
They are currently only supported by StrField, UUIDField, all |
|||
*PointFields, and depending on the field type, they might require |
|||
the field to be single-valued, be required or have a default value |
|||
(check the documentation of the field type you're interested in for |
|||
more information) |
|||
multiValued: true if this field may contain multiple values per document |
|||
omitNorms: (expert) set to true to omit the norms associated with |
|||
this field (this disables length normalization and index-time |
|||
boosting for the field, and saves some memory). Only full-text |
|||
fields or fields that need an index-time boost need norms. |
|||
Norms are omitted for primitive (non-analyzed) types by default. |
|||
termVectors: [false] set to true to store the term vector for a |
|||
given field. |
|||
When using MoreLikeThis, fields used for similarity should be |
|||
stored for best performance. |
|||
termPositions: Store position information with the term vector. |
|||
This will increase storage costs. |
|||
termOffsets: Store offset information with the term vector. This |
|||
will increase storage costs. |
|||
required: The field is required. It will throw an error if the |
|||
value does not exist |
|||
default: a value that should be used if no value is specified |
|||
when adding a document. |
|||
--> |
|||
|
|||
<!-- field names should consist of alphanumeric or underscore characters only and |
|||
not start with a digit. This is not currently strictly enforced, |
|||
but other field names will not have first class support from all components |
|||
and back compatibility is not guaranteed. Names with both leading and |
|||
trailing underscores (e.g. _version_) are reserved. |
|||
--> |
|||
|
|||
<!-- In this _default configset, only four fields are pre-declared: |
|||
id, _version_, and _text_ and _root_. All other fields will be type guessed and added via the |
|||
"add-unknown-fields-to-the-schema" update request processor chain declared in solrconfig.xml. |
|||
|
|||
Note that many dynamic fields are also defined - you can use them to specify a |
|||
field's type via field naming conventions - see below. |
|||
|
|||
WARNING: The _text_ catch-all field will significantly increase your index size. |
|||
If you don't need it, consider removing it and the corresponding copyField directive. |
|||
--> |
|||
|
|||
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> |
|||
<!-- docValues are enabled by default for long type so we don't need to index the version field --> |
|||
<field name="_version_" type="plong" indexed="false" stored="false"/> |
|||
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" /> |
|||
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/> |
|||
|
|||
<!-- Django fields --> |
|||
<field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> |
|||
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> |
|||
<field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> |
|||
|
|||
<!-- This can be enabled, in case the client does not know what fields may be searched. It isn't enabled by default |
|||
because it's very expensive to index everything twice. --> |
|||
<!-- <copyField source="*" dest="_text_"/> --> |
|||
|
|||
<!-- Dynamic field definitions allow using convention over configuration |
|||
for fields via the specification of patterns to match field names. |
|||
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) |
|||
RESTRICTION: the glob-like pattern in the name attribute must have a "*" only at the start or the end. --> |
|||
|
|||
<dynamicField name="*_i" type="pint" indexed="true" stored="true"/> |
|||
<dynamicField name="*_is" type="pints" indexed="true" stored="true"/> |
|||
<dynamicField name="*_s" type="string" indexed="true" stored="true" /> |
|||
<dynamicField name="*_ss" type="strings" indexed="true" stored="true"/> |
|||
<dynamicField name="*_l" type="plong" indexed="true" stored="true"/> |
|||
<dynamicField name="*_ls" type="plongs" indexed="true" stored="true"/> |
|||
<dynamicField name="*_t" type="text_general" indexed="true" stored="true" multiValued="false"/> |
|||
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/> |
|||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> |
|||
<dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/> |
|||
<dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/> |
|||
<dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/> |
|||
<dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/> |
|||
<dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/> |
|||
|
|||
<!-- Type used for data-driven schema, to add a string copy for each text field --> |
|||
<dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" /> |
|||
|
|||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> |
|||
<dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/> |
|||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> |
|||
<dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/> |
|||
|
|||
<!-- payloaded dynamic fields --> |
|||
<dynamicField name="*_dpf" type="delimited_payloads_float" indexed="true" stored="true"/> |
|||
<dynamicField name="*_dpi" type="delimited_payloads_int" indexed="true" stored="true"/> |
|||
<dynamicField name="*_dps" type="delimited_payloads_string" indexed="true" stored="true"/> |
|||
|
|||
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> |
|||
|
|||
<!-- Field to use to determine and enforce document uniqueness. |
|||
Unless this field is marked with required="false", it will be a required field |
|||
--> |
|||
<uniqueKey>id</uniqueKey> |
|||
|
|||
<!-- copyField commands copy one field to another at the time a document |
|||
is added to the index. It's used either to index the same field differently, |
|||
or to add multiple fields to the same field for easier/faster searching. |
|||
|
|||
<copyField source="sourceFieldName" dest="destinationFieldName"/> |
|||
--> |
|||
|
|||
<!-- field type definitions. The "name" attribute is |
|||
just a label to be used by field definitions. The "class" |
|||
attribute and any other attributes determine the real |
|||
behavior of the fieldType. |
|||
Class names starting with "solr" refer to java classes in a |
|||
standard package such as org.apache.solr.analysis |
|||
--> |
|||
|
|||
<!-- sortMissingLast and sortMissingFirst attributes are optional attributes are |
|||
currently supported on types that are sorted internally as strings |
|||
and on numeric types. |
|||
This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble". |
|||
- If sortMissingLast="true", then a sort on this field will cause documents |
|||
without the field to come after documents with the field, |
|||
regardless of the requested sort order (asc or desc). |
|||
- If sortMissingFirst="true", then a sort on this field will cause documents |
|||
without the field to come before documents with the field, |
|||
regardless of the requested sort order. |
|||
- If sortMissingLast="false" and sortMissingFirst="false" (the default), |
|||
then default lucene sorting will be used which places docs without the |
|||
field first in an ascending sort and last in a descending sort. |
|||
--> |
|||
|
|||
<!-- The StrField type is not analyzed, but indexed/stored verbatim. --> |
|||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" /> |
|||
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" /> |
|||
|
|||
<!-- boolean type: "true" or "false" --> |
|||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> |
|||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> |
|||
|
|||
<!-- |
|||
Numeric field types that index values using KD-trees. |
|||
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc. |
|||
--> |
|||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> |
|||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/> |
|||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> |
|||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> |
|||
|
|||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> |
|||
|
|||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and |
|||
is a more restricted form of the canonical representation of dateTime |
|||
http://www.w3.org/TR/xmlschema-2/#dateTime |
|||
The trailing "Z" designates UTC time and is mandatory. |
|||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z |
|||
All other components are mandatory. |
|||
|
|||
Expressions can also be used to denote calculations that should be |
|||
performed relative to "NOW" to determine the value, ie... |
|||
|
|||
NOW/HOUR |
|||
... Round to the start of the current hour |
|||
NOW-1DAY |
|||
... Exactly 1 day prior to now |
|||
NOW/DAY+6MONTHS+3DAYS |
|||
... 6 months and 3 days in the future from the start of |
|||
the current day |
|||
|
|||
--> |
|||
<!-- KD-tree versions of date fields --> |
|||
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/> |
|||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> |
|||
|
|||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings --> |
|||
<fieldType name="binary" class="solr.BinaryField"/> |
|||
|
|||
<!-- solr.TextField allows the specification of custom text analyzers |
|||
specified as a tokenizer and a list of token filters. Different |
|||
analyzers may be specified for indexing and querying. |
|||
|
|||
The optional positionIncrementGap puts space between multiple fields of |
|||
this type on the same document, with the purpose of preventing false phrase |
|||
matching across fields. |
|||
|
|||
For more info on customizing your analyzer chain, please see |
|||
http://lucene.apache.org/solr/guide/understanding-analyzers-tokenizers-and-filters.html#understanding-analyzers-tokenizers-and-filters |
|||
--> |
|||
|
|||
<!-- One can also specify an existing Analyzer class that has a |
|||
default constructor via the class attribute on the analyzer element. |
|||
Example: |
|||
<fieldType name="text_greek" class="solr.TextField"> |
|||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/> |
|||
</fieldType> |
|||
--> |
|||
|
|||
<!-- A text field that only splits on whitespace for exact matching of words --> |
|||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/> |
|||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- A general text field that has reasonable, generic |
|||
cross-language defaults: it tokenizes with StandardTokenizer, |
|||
removes stop words from case-insensitive "stopwords.txt" |
|||
(empty by default), and down cases. At query time only, it |
|||
also applies synonyms. |
|||
--> |
|||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<!-- in this example, we will only use synonyms at query time |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|||
<filter class="solr.FlattenGraphFilterFactory"/> |
|||
--> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
|
|||
<!-- SortableTextField generaly functions exactly like TextField, |
|||
except that it supports, and by default uses, docValues for sorting (or faceting) |
|||
on the first 1024 characters of the original field values (which is configurable). |
|||
|
|||
This makes it a bit more useful then TextField in many situations, but the trade-off |
|||
is that it takes up more space on disk; which is why it's not used in place of TextField |
|||
for every fieldType in this _default schema. |
|||
--> |
|||
<dynamicField name="*_t_sort" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/> |
|||
<dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/> |
|||
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer, |
|||
removes English stop words (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and |
|||
finally applies Porter's stemming. The query time analyzer also applies synonyms from synonyms.txt. --> |
|||
<dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/> |
|||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<!-- in this example, we will only use synonyms at query time |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|||
<filter class="solr.FlattenGraphFilterFactory"/> |
|||
--> |
|||
<!-- Case insensitive stop word removal. |
|||
--> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
--> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
--> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- A text field with defaults appropriate for English, plus |
|||
aggressive word-splitting and autophrase features enabled. |
|||
This field is just like text_en, except it adds |
|||
WordDelimiterGraphFilter to enable splitting and matching of |
|||
words on case-change, alpha numeric boundaries, and |
|||
non-alphanumeric chars. This means certain compound word |
|||
cases will work, for example query "wi fi" will match |
|||
document "WiFi" or "wi-fi". |
|||
--> |
|||
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/> |
|||
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<!-- in this example, we will only use synonyms at query time |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|||
--> |
|||
<!-- Case insensitive stop word removal. |
|||
--> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
<filter class="solr.FlattenGraphFilterFactory" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names, |
|||
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> |
|||
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/> |
|||
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> |
|||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
|||
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
|||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|||
<filter class="solr.FlattenGraphFilterFactory" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> |
|||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes |
|||
possible with WordDelimiterGraphFilter in conjuncton with stemming. --> |
|||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- Just like text_general except it reverses the characters of |
|||
each token, to enable more efficient leading wildcard queries. |
|||
--> |
|||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/> |
|||
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" |
|||
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/> |
|||
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" > |
|||
<analyzer> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- lowercases the entire field value, keeping it as a single token. --> |
|||
<dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/> |
|||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer> |
|||
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|||
<filter class="solr.LowerCaseFilterFactory" /> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- |
|||
Example of using PathHierarchyTokenizerFactory at index time, so |
|||
queries for paths match documents at that path, or in descendent paths |
|||
--> |
|||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/> |
|||
<fieldType name="descendent_path" class="solr.TextField"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.KeywordTokenizerFactory" /> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- |
|||
Example of using PathHierarchyTokenizerFactory at query time, so |
|||
queries for paths match documents at that path, or in ancestor paths |
|||
--> |
|||
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/> |
|||
<fieldType name="ancestor_path" class="solr.TextField"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.KeywordTokenizerFactory" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- This point type indexes the coordinates as separate fields (subFields) |
|||
If subFieldType is defined, it references a type, and a dynamic field |
|||
definition is created matching *___<typename>. Alternately, if |
|||
subFieldSuffix is defined, that is used to create the subFields. |
|||
Example: if subFieldType="double", then the coordinates would be |
|||
indexed in fields myloc_0___double,myloc_1___double. |
|||
Example: if subFieldSuffix="_d" then the coordinates would be indexed |
|||
in fields myloc_0_d,myloc_1_d |
|||
The subFields are an implementation detail of the fieldType, and end |
|||
users normally should not need to know about them. |
|||
--> |
|||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/> |
|||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
|||
|
|||
<!-- A specialized field for geospatial search filters and distance sorting. --> |
|||
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/> |
|||
|
|||
<!-- A geospatial field type that supports multiValued and polygon shapes. |
|||
For more information about this and other spatial fields see: |
|||
http://lucene.apache.org/solr/guide/spatial-search.html |
|||
--> |
|||
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" |
|||
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" /> |
|||
|
|||
<!-- Payloaded field types --> |
|||
<fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField"> |
|||
<analyzer> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
<fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField"> |
|||
<analyzer> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
<fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField"> |
|||
<analyzer> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- Portuguese --> |
|||
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> |
|||
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer> |
|||
<charFilter class="solr.HTMLStripCharFilterFactory"/> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> |
|||
<filter class="solr.PortugueseLightStemFilterFactory"/> |
|||
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> |
|||
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> |
|||
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- Similarity is the scoring routine for each document vs. a query. |
|||
A custom Similarity or SimilarityFactory may be specified here, but |
|||
the default is fine for most applications. |
|||
For more info: http://lucene.apache.org/solr/guide/other-schema-elements.html#OtherSchemaElements-Similarity |
|||
--> |
|||
<!-- |
|||
<similarity class="com.example.solr.CustomSimilarityFactory"> |
|||
<str name="paramkey">param value</str> |
|||
</similarity> |
|||
--> |
|||
|
|||
</schema> |
@ -0,0 +1,20 @@ |
|||
{"params":{ |
|||
"query":{ |
|||
"defType":"edismax", |
|||
"q.alt":"*:*", |
|||
"rows":"10", |
|||
"fl":"*,score", |
|||
"":{"v":0} |
|||
}, |
|||
"facets":{ |
|||
"facet":"on", |
|||
"facet.mincount": "1", |
|||
"":{"v":0} |
|||
}, |
|||
"velocity":{ |
|||
"wt": "velocity", |
|||
"v.template":"browse", |
|||
"v.layout": "layout", |
|||
"":{"v":0} |
|||
} |
|||
}} |
@ -0,0 +1,21 @@ |
|||
# The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
# (the "License"); you may not use this file except in compliance with |
|||
# the License. You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
#----------------------------------------------------------------------- |
|||
# Use a protected word file to protect against the stemmer reducing two |
|||
# unrelated words to the same base word. |
|||
|
|||
# Some non-words that normally won't be encountered, |
|||
# just to test that they won't be stemmed. |
|||
dontstems |
|||
zwhacky |
|||
|
Binary file not shown.
@ -0,0 +1,165 @@ |
|||
<?xml version="1.0" ?> |
|||
<!-- |
|||
Licensed to the Apache Software Foundation (ASF) under one or more |
|||
contributor license agreements. See the NOTICE file distributed with |
|||
this work for additional information regarding copyright ownership. |
|||
The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
(the "License"); you may not use this file except in compliance with |
|||
the License. You may obtain a copy of the License at |
|||
|
|||
http://www.apache.org/licenses/LICENSE-2.0 |
|||
|
|||
Unless required by applicable law or agreed to in writing, software |
|||
distributed under the License is distributed on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
See the License for the specific language governing permissions and |
|||
limitations under the License. |
|||
--> |
|||
|
|||
<schema name="default" version="1.6"> |
|||
<types> |
|||
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/> |
|||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/> |
|||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/> |
|||
<fieldtype name="binary" class="solr.BinaryField"/> |
|||
|
|||
<!-- Numeric field types that manipulate the value into |
|||
a string value that isn't human-readable in its internal form, |
|||
but with a lexicographic ordering the same as the numeric ordering, |
|||
so that range queries work correctly. --> |
|||
<fieldType name="pint" class="solr.IntPointField" docValues="true" /> |
|||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true" /> |
|||
<fieldType name="plong" class="solr.LongPointField" docValues="true" /> |
|||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/> |
|||
|
|||
|
|||
<fieldType name="pdate" class="solr.DatePointField" docValues="true" /> |
|||
<!-- A Trie based date field ifor faster date range queries and date faceting. --> |
|||
|
|||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/> |
|||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/> |
|||
|
|||
|
|||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> |
|||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> |
|||
<fieldtype name="geohash" class="solr.GeoHashField"/> |
|||
|
|||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<!-- in this example, we will only use synonyms at query time |
|||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
|||
--> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> |
|||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<!-- Portuguese --> |
|||
<dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/> |
|||
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" /> |
|||
<filter class="solr.PortugueseLightStemFilterFactory"/> |
|||
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> --> |
|||
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> --> |
|||
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> --> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
|
|||
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
--> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.StandardTokenizerFactory"/> |
|||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
|||
<filter class="solr.StopFilterFactory" |
|||
ignoreCase="true" |
|||
words="lang/stopwords_en.txt" |
|||
/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.EnglishPossessiveFilterFactory"/> |
|||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
|||
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory: |
|||
<filter class="solr.EnglishMinimalStemFilterFactory"/> |
|||
--> |
|||
<filter class="solr.PorterStemFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> |
|||
<analyzer> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<fieldType name="ngram" class="solr.TextField" > |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
<filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.KeywordTokenizerFactory"/> |
|||
<filter class="solr.LowerCaseFilterFactory"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
|
|||
<fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1"> |
|||
<analyzer type="index"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory" /> |
|||
<filter class="solr.LowerCaseFilterFactory" /> |
|||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|||
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" /> |
|||
</analyzer> |
|||
<analyzer type="query"> |
|||
<tokenizer class="solr.WhitespaceTokenizerFactory" /> |
|||
<filter class="solr.LowerCaseFilterFactory" /> |
|||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
|||
</analyzer> |
|||
</fieldType> |
|||
</types> |
|||
|
|||
<fields> |
|||
<!-- general --> |
|||
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/> |
|||
<field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/> |
|||
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/> |
|||
<field name="_version_" type="plong" indexed="true" stored ="true"/> |
|||
<field name="text" type="text_pt" indexed="true" stored="true" multiValued="false" /> |
|||
|
|||
</fields> |
|||
|
|||
<!-- field to use to determine and enforce document uniqueness. --> |
|||
<uniqueKey>id</uniqueKey> |
|||
|
|||
<!-- field for the QueryParser to use when an explicit fieldname is absent --> |
|||
<df>text</df> |
|||
|
|||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> |
|||
<solrQueryParser q.op="AND"/> |
|||
</schema> |
File diff suppressed because it is too large
@ -0,0 +1,14 @@ |
|||
# Licensed to the Apache Software Foundation (ASF) under one or more |
|||
# contributor license agreements. See the NOTICE file distributed with |
|||
# this work for additional information regarding copyright ownership. |
|||
# The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
# (the "License"); you may not use this file except in compliance with |
|||
# the License. You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
@ -0,0 +1,29 @@ |
|||
# The ASF licenses this file to You under the Apache License, Version 2.0 |
|||
# (the "License"); you may not use this file except in compliance with |
|||
# the License. You may obtain a copy of the License at |
|||
# |
|||
# http://www.apache.org/licenses/LICENSE-2.0 |
|||
# |
|||
# Unless required by applicable law or agreed to in writing, software |
|||
# distributed under the License is distributed on an "AS IS" BASIS, |
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
# See the License for the specific language governing permissions and |
|||
# limitations under the License. |
|||
|
|||
#----------------------------------------------------------------------- |
|||
#some test synonym mappings unlikely to appear in real input text |
|||
aaafoo => aaabar |
|||
bbbfoo => bbbfoo bbbbar |
|||
cccfoo => cccbar cccbaz |
|||
fooaaa,baraaa,bazaaa |
|||
|
|||
# Some synonym groups specific to this example |
|||
GB,gib,gigabyte,gigabytes |
|||
MB,mib,megabyte,megabytes |
|||
Television, Televisions, TV, TVs |
|||
#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming |
|||
#after us won't split it into two words. |
|||
|
|||
# Synonym mappings can be used for spelling correction too |
|||
pixima => pixma |
|||
|
@ -0,0 +1,155 @@ |
|||
|
|||
import requests |
|||
import subprocess |
|||
import sys |
|||
import argparse |
|||
|
|||
|
|||
class SolrClient: |
|||
|
|||
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json" |
|||
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json" |
|||
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json" |
|||
STATUS_COLLECTION = "{}/solr/admin/collections?action=CLUSTERSTATUS&collection={}&wt=json" |
|||
STATUS_CORE = "{}/admin/cores?action=STATUS&name={}" |
|||
EXISTS_COLLECTION = "{}/solr/{}/admin/ping?wt=json" |
|||
OPTIMIZE_COLLECTION = "{}/solr/{}/update?optimize=true&wt=json" |
|||
CREATE_COLLECTION = "{}/solr/admin/collections?action=CREATE&name={}&collection.configName={}&numShards={}&replicationFactor={}&maxShardsPerNode={}&wt=json" |
|||
DELETE_COLLECTION = "{}/solr/admin/collections?action=DELETE&name={}&wt=json" |
|||
DELETE_DATA = "{}/solr/{}/update?commitWithin=1000&overwrite=true&wt=json" |
|||
QUERY_DATA = "{}/solr/{}/select?q=*:*" |
|||
|
|||
CONFIGSET_NAME = "sapl_configset" |
|||
|
|||
def __init__(self, url): |
|||
self.url = url |
|||
|
|||
def get_num_docs(self, collection_name): |
|||
final_url = self.QUERY_DATA.format(self.url, collection_name) |
|||
res = requests.get(final_url) |
|||
dic = res.json() |
|||
num_docs = dic["response"]["numFound"] |
|||
return num_docs |
|||
|
|||
def list_collections(self): |
|||
req_url = self.LIST_COLLECTIONS.format(self.url) |
|||
res = requests.get(req_url) |
|||
dic = res.json() |
|||
return dic['collections'] |
|||
|
|||
def exists_collection(self, collection_name): |
|||
collections = self.list_collections() |
|||
return True if collection_name in collections else False |
|||
|
|||
def maybe_upload_configset(self, force=False): |
|||
req_url = self.LIST_CONFIGSETS.format(self.url) |
|||
res = requests.get(req_url) |
|||
dic = res.json() |
|||
configsets = dic['configSets'] |
|||
# UPLOAD configset |
|||
if not self.CONFIGSET_NAME in configsets or force: |
|||
files = {'file': ('saplconfigset.zip', |
|||
open('./solr/sapl_configset/conf/saplconfigset.zip', |
|||
'rb'), |
|||
'application/octet-stream', |
|||
{'Expires': '0'})} |
|||
|
|||
req_url = self.UPLOAD_CONFIGSET.format(self.url, self.CONFIGSET_NAME) |
|||
|
|||
resp = requests.post(req_url, files=files) |
|||
print(resp.content) |
|||
else: |
|||
print('O %s já presente no servidor, NÃO enviando.' % self.CONFIGSET_NAME) |
|||
|
|||
def create_collection(self, collection_name, shards=1, replication_factor=1, max_shards_per_node=1): |
|||
self.maybe_upload_configset() |
|||
req_url = self.CREATE_COLLECTION.format(self.url, |
|||
collection_name, |
|||
self.CONFIGSET_NAME, |
|||
shards, |
|||
replication_factor, |
|||
max_shards_per_node) |
|||
res = requests.post(req_url) |
|||
if res.ok: |
|||
print("Collection '%s' created succesfully" % collection_name) |
|||
else: |
|||
print("Error creating collection '%s'" % collection_name) |
|||
as_json = res.json() |
|||
print("Error %s: %s" % (res.status_code, as_json['error']['msg'])) |
|||
return False |
|||
return True |
|||
|
|||
def delete_collection(self, collection_name): |
|||
if collection_name == '*': |
|||
collections = self.list_collections() |
|||
else: |
|||
collections = [collection_name] |
|||
|
|||
for c in collections: |
|||
req_url = self.DELETE_COLLECTION.format(self.url, c) |
|||
res = requests.post(req_url) |
|||
if not res.ok: |
|||
print("Error deleting collection '%s'", c) |
|||
print("Code {}: {}".format(res.status_code, res.text)) |
|||
else: |
|||
print("Collection '%s' deleted successfully!" % c) |
|||
|
|||
def delete_index_data(self, collection_name): |
|||
req_url = self.DELETE_DATA.format(self.url, collection_name) |
|||
res = requests.post(req_url, |
|||
data='<delete><query>*:*</query></delete>', |
|||
headers={'Content-Type': 'application/xml'}) |
|||
if not res.ok: |
|||
print("Error deleting index for collection '%s'", collection_name) |
|||
print("Code {}: {}".format(res.status_code, res.text)) |
|||
else: |
|||
print("Collection '%s' data deleted successfully!" % collection_name) |
|||
|
|||
num_docs = self.get_num_docs(collection_name) |
|||
print("Num docs: %s" % num_docs) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
|
|||
parser = argparse.ArgumentParser(description='Cria uma collection no Solr') |
|||
|
|||
# required arguments |
|||
parser.add_argument('-u', type=str, metavar='URL', nargs=1, dest='url', |
|||
required=True, help='Endereço do servidor Solr na forma http(s)://<address>[:port]') |
|||
parser.add_argument('-c', type=str, metavar='COLLECTION', dest='collection', nargs=1, |
|||
required=True, help='Collection Solr a ser criada') |
|||
|
|||
# optional arguments |
|||
parser.add_argument('-s', type=int, dest='shards', nargs='?', |
|||
help='Number of shards (default=1)', default=1) |
|||
parser.add_argument('-rf', type=int, dest='replication_factor', nargs='?', |
|||
help='Replication factor (default=1)', default=1) |
|||
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?', |
|||
help='Max shards per node (default=1)', default=1) |
|||
|
|||
try: |
|||
args = parser.parse_args() |
|||
except IOError as msg: |
|||
parser.error(str(msg)) |
|||
sys.exit(-1) |
|||
|
|||
url = args.url.pop() |
|||
collection = args.collection.pop() |
|||
|
|||
client = SolrClient(url=url) |
|||
|
|||
if not client.exists_collection(collection): |
|||
print("Collection '%s' doesn't exists. Creating a new one..." % collection) |
|||
created = client.create_collection(collection, |
|||
shards=args.shards, |
|||
replication_factor=args.replication_factor, |
|||
max_shards_per_node=args.max_shards_per_node) |
|||
if not created: |
|||
sys.exit(-1) |
|||
else: |
|||
print("Collection '%s' exists." % collection) |
|||
|
|||
num_docs = client.get_num_docs(collection) |
|||
if num_docs == 0: |
|||
print("Performing a full reindex of '%s' collection..." % collection) |
|||
p = subprocess.call(["python3", "manage.py", "rebuild_index", "--noinput"]) |
Loading…
Reference in new issue