aboutsummaryrefslogtreecommitdiff
path: root/migrations/versions/7ff57806ffd5_.py
blob: f1e48690dcdb390813410f76c2d8766d4651bd7e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""empty message

Revision ID: 7ff57806ffd5
Revises: 2f3c3597c78d
Create Date: 2019-01-29 02:57:50.279918

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = '7ff57806ffd5'
down_revision = '2f3c3597c78d'
branch_labels = None
depends_on = None


def upgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    op.execute("""
    DROP TYPE IF EXISTS tsq_state CASCADE;

CREATE TYPE tsq_state AS (
    search_query text,
    parentheses_stack int,
    skip_for int,
    current_token text,
    current_index int,
    current_char text,
    previous_char text,
    tokens text[]
);

CREATE OR REPLACE FUNCTION tsq_append_current_token(state tsq_state)
RETURNS tsq_state AS $$
BEGIN
    IF state.current_token != '' THEN
        state.tokens := array_append(state.tokens, state.current_token);
        state.current_token := '';
    END IF;
    RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_tokenize_character(state tsq_state)
RETURNS tsq_state AS $$
BEGIN
    IF state.current_char = '(' THEN
        state.tokens := array_append(state.tokens, '(');
        state.parentheses_stack := state.parentheses_stack + 1;
        state := tsq_append_current_token(state);
    ELSIF state.current_char = ')' THEN
        IF (state.parentheses_stack > 0 AND state.current_token != '') THEN
            state := tsq_append_current_token(state);
            state.tokens := array_append(state.tokens, ')');
            state.parentheses_stack := state.parentheses_stack - 1;
        END IF;
    ELSIF state.current_char = '"' THEN
        state.skip_for := position('"' IN substring(
            state.search_query FROM state.current_index + 1
        ));

        IF state.skip_for > 1 THEN
            state.tokens = array_append(
                state.tokens,
                substring(
                    state.search_query
                    FROM state.current_index FOR state.skip_for + 1
                )
            );
        ELSIF state.skip_for = 0 THEN
            state.current_token := state.current_token || state.current_char;
        END IF;
    ELSIF (
        state.current_char = '-' AND
        (state.current_index = 1 OR state.previous_char = ' ')
    ) THEN
        state.tokens := array_append(state.tokens, '-');
    ELSIF state.current_char = ' ' THEN
        state := tsq_append_current_token(state);
        IF substring(
            state.search_query FROM state.current_index FOR 4
        ) = ' or ' THEN
            state.skip_for := 2;

            -- remove duplicate OR tokens
            IF state.tokens[array_length(state.tokens, 1)] != ' | ' THEN
                state.tokens := array_append(state.tokens, ' | ');
            END IF;
        END IF;
    ELSE
        state.current_token = state.current_token || state.current_char;
    END IF;
    RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_tokenize(search_query text) RETURNS text[] AS $$
DECLARE
    state tsq_state;
BEGIN
    SELECT
        search_query::text AS search_query,
        0::int AS parentheses_stack,
        0 AS skip_for,
        ''::text AS current_token,
        0 AS current_index,
        ''::text AS current_char,
        ''::text AS previous_char,
        '{}'::text[] AS tokens
    INTO state;

    state.search_query := lower(trim(
        regexp_replace(search_query, '""+', '""', 'g')
    ));

    FOR state.current_index IN (
        SELECT generate_series(1, length(state.search_query))
    ) LOOP
        state.current_char := substring(
            search_query FROM state.current_index FOR 1
        );

        IF state.skip_for > 0 THEN
            state.skip_for := state.skip_for - 1;
            CONTINUE;
        END IF;

        state := tsq_tokenize_character(state);
        state.previous_char := state.current_char;
    END LOOP;
    state := tsq_append_current_token(state);

    state.tokens := array_nremove(state.tokens, '(', -state.parentheses_stack);

    RETURN state.tokens;
END;
$$ LANGUAGE plpgsql IMMUTABLE;


-- Processes an array of text search tokens and returns a tsquery
CREATE OR REPLACE FUNCTION tsq_process_tokens(config regconfig, tokens text[])
RETURNS tsquery AS $$
DECLARE
    result_query text;
    previous_value text;
    value text;
BEGIN
    result_query := '';
    FOREACH value IN ARRAY tokens LOOP
        IF value = '"' THEN
            CONTINUE;
        END IF;

        IF left(value, 1) = '"' AND right(value, 1) = '"' THEN
            value := phraseto_tsquery(config, value);
        ELSIF value NOT IN ('(', ' | ', ')', '-') THEN
            value := quote_literal(value) || ':*';
        END IF;

        IF previous_value = '-' THEN
            IF value = '(' THEN
                value := '!' || value;
            ELSE
                value := '!(' || value || ')';
            END IF;
        END IF;

        SELECT
            CASE
                WHEN result_query = '' THEN value
                WHEN (
                    previous_value IN ('!(', '(', ' | ') OR
                    value IN (')', ' | ')
                ) THEN result_query || value
                ELSE result_query || ' & ' || value
            END
        INTO result_query;
        previous_value := value;
    END LOOP;

    RETURN to_tsquery(config, result_query);
END;
$$ LANGUAGE plpgsql IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_process_tokens(tokens text[])
RETURNS tsquery AS $$
    SELECT tsq_process_tokens(get_current_ts_config(), tokens);
$$ LANGUAGE SQL IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_parse(config regconfig, search_query text)
RETURNS tsquery AS $$
    SELECT tsq_process_tokens(config, tsq_tokenize(search_query));
$$ LANGUAGE SQL IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_parse(config text, search_query text)
RETURNS tsquery AS $$
    SELECT tsq_parse(config::regconfig, search_query);
$$ LANGUAGE SQL IMMUTABLE;


CREATE OR REPLACE FUNCTION tsq_parse(search_query text) RETURNS tsquery AS $$
    SELECT tsq_parse(get_current_ts_config(), search_query);
$$ LANGUAGE SQL IMMUTABLE;


-- remove first N elements equal to the given value from the array (array
-- must be one-dimensional)
--
-- If negative value is given as the third argument the removal of elements
-- starts from the last array element.
CREATE OR REPLACE FUNCTION array_nremove(anyarray, anyelement, int)
RETURNS ANYARRAY AS $$
    WITH replaced_positions AS (
        SELECT UNNEST(
            CASE
            WHEN $2 IS NULL THEN
                '{}'::int[]
            WHEN $3 > 0 THEN
                (array_positions($1, $2))[1:$3]
            WHEN $3 < 0 THEN
                (array_positions($1, $2))[
                    (cardinality(array_positions($1, $2)) + $3 + 1):
                ]
            ELSE
                '{}'::int[]
            END
        ) AS position
    )
    SELECT COALESCE((
        SELECT array_agg(value)
        FROM unnest($1) WITH ORDINALITY AS t(value, index)
        WHERE index NOT IN (SELECT position FROM replaced_positions)
    ), $1[1:0]);
$$ LANGUAGE SQL IMMUTABLE;
""")
    # ### end Alembic commands ###


def downgrade():
    # ### commands auto generated by Alembic - please adjust! ###
    pass
    # ### end Alembic commands ###