Browse Source

elasticsearch 7.x compatability (#576)

* es_mapping: update turning off dynamic mappings

they changed it in 6.x

https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html
https://github.com/elastic/elasticsearch/pull/25734

* es_mapping: remove _all field

deprecated in 6.0 anyway

* es_mapping.yml: fix deprecated mapping type

https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html#_schedule_for_removal_of_mapping_types

it gives a really unhelpful error otherwise, oof.

* es: fix remaining 7.xisms

the enabled: false apparently only applies to
"object" fields now, need index: false

and the _type got removed everywhere. Seems to work now.

* Fix weird offset error with word_delimiter_graph

yet another es7-ism i guess

* Fix warning and some app stuff for ES 7.x

Co-authored-by: Arylide <Arylide@users.noreply.github.com>
master
queue 1 year ago
committed by GitHub
parent
commit
4fcef92b94
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      create_es.sh
  2. 132
      es_mapping.yml
  3. 1
      import_to_es.py
  4. 2
      nyaa/templates/search_results.html
  5. 2
      nyaa/views/main.py
  6. 3
      sync_es.py

1
create_es.sh

@ -1,4 +1,5 @@
#!/usr/bin/env bash
set -e
# create indices named "nyaa" and "sukebei", these are hardcoded
curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

132
es_mapping.yml

@ -10,7 +10,6 @@ settings:
char_filter:
- my_char_filter
filter:
- standard
- lowercase
my_index_analyzer:
type: custom
@ -52,7 +51,7 @@ settings:
filter:
my_ngram:
type: edgeNGram
type: edge_ngram
min_gram: 1
max_gram: 15
fullword_min:
@ -66,9 +65,13 @@ settings:
type: pattern_capture
patterns: ["0*([0-9]*)"]
word_delimit:
type: word_delimiter
type: word_delimiter_graph
preserve_original: true
split_on_numerics: false
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms
# since we're using "trim" filters downstream, otherwise
# you get weird lucene errors about startOffset
adjust_offsets: false
char_filter:
my_char_filter:
type: mapping
@ -78,70 +81,65 @@ settings:
# plus replicas don't really help either.
number_of_shards: 1
number_of_replicas : 0
mapper:
# disable elasticsearch's "helpful" autoschema
dynamic: false
# since we disabled the _all field, default query the
# name of the torrent.
query:
default_field: display_name
mappings:
torrent:
# don't want everything concatenated
_all:
enabled: false
properties:
id:
type: long
display_name:
# TODO could do a fancier tokenizer here to parse out the
# the scene convention of stuff in brackets, plus stuff like k-on
type: text
analyzer: my_index_analyzer
fielddata: true # Is this required?
fields:
# Multi-field for full-word matching (when going over ngram limits)
# Note: will have to be queried for, not automatic
fullword:
type: text
analyzer: my_fullword_index_analyzer
# Stored for exact phrase matching
exact:
type: text
analyzer: exact_analyzer
created_time:
type: date
# Only in the ES index for generating magnet links
info_hash:
enabled: false
filesize:
type: long
anonymous:
type: boolean
trusted:
type: boolean
remake:
type: boolean
complete:
type: boolean
hidden:
type: boolean
deleted:
type: boolean
has_torrent:
type: boolean
download_count:
type: long
leech_count:
type: long
seed_count:
type: long
comment_count:
type: long
# these ids are really only for filtering, thus keyword
uploader_id:
type: keyword
main_category_id:
type: keyword
sub_category_id:
type: keyword
# disable elasticsearch's "helpful" autoschema
dynamic: false
properties:
id:
type: long
display_name:
# TODO could do a fancier tokenizer here to parse out the
# the scene convention of stuff in brackets, plus stuff like k-on
type: text
analyzer: my_index_analyzer
fielddata: true # Is this required?
fields:
# Multi-field for full-word matching (when going over ngram limits)
# Note: will have to be queried for, not automatic
fullword:
type: text
analyzer: my_fullword_index_analyzer
# Stored for exact phrase matching
exact:
type: text
analyzer: exact_analyzer
created_time:
type: date
#
# Only in the ES index for generating magnet links
info_hash:
type: keyword
index: false
filesize:
type: long
anonymous:
type: boolean
trusted:
type: boolean
remake:
type: boolean
complete:
type: boolean
hidden:
type: boolean
deleted:
type: boolean
has_torrent:
type: boolean
download_count:
type: long
leech_count:
type: long
seed_count:
type: long
comment_count:
type: long
# these ids are really only for filtering, thus keyword
uploader_id:
type: keyword
main_category_id:
type: keyword
sub_category_id:
type: keyword

1
import_to_es.py

@ -34,7 +34,6 @@ def pad_bytes(in_bytes, size):
def mk_es(t, index_name):
return {
"_id": t.id,
"_type": "torrent",
"_index": index_name,
"_source": {
# we're also indexing the id as a number so you can

2
nyaa/templates/search_results.html

@ -17,7 +17,7 @@
{% endif %}
{% endif %}
{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
{% if (use_elastic and torrent_query.hits.total.value > 0) or (torrent_query.items) %}
<div class="table-responsive">
<table class="table table-bordered table-hover table-striped torrent-list">
<thead>

2
nyaa/views/main.py

@ -167,7 +167,7 @@ def home(rss):
else:
rss_query_string = _generate_query_string(
search_term, category, quality_filter, user_name)
max_results = min(max_search_results, query_results['hits']['total'])
max_results = min(max_search_results, query_results['hits']['total']['value'])
# change p= argument to whatever you change page_parameter to or pagination breaks
pagination = Pagination(p=query_args['page'], per_page=results_per_page,
total=max_results, bs_version=3, page_parameter='p',

3
sync_es.py

@ -114,7 +114,6 @@ def reindex_torrent(t, index_name):
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(t['id']),
"doc": doc,
"doc_as_upsert": True
@ -128,7 +127,6 @@ def reindex_stats(s, index_name):
return {
'_op_type': 'update',
'_index': index_name,
'_type': 'torrent',
'_id': str(s['torrent_id']),
"doc": {
"stats_last_updated": s["last_updated"],
@ -141,7 +139,6 @@ def delet_this(row, index_name):
return {
"_op_type": 'delete',
'_index': index_name,
'_type': 'torrent',
'_id': str(row['values']['id'])}
# we could try to make this script robust to errors from es or mysql, but since

Loading…
Cancel
Save