# couchbase-fts-to-record-set.R -rw-r--r-- 3.6 KiB View raw
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#' Return all record specified by document ids
#' 
#' @param keys character vector of couchbase document ids
#' @param bucket the bucket where the documents live
#' @param cb_svc_url_prefix,cb_username,cb_password Your couchbase connection info
cb_get_records_from_keys <- function(keys,
                                     bucket,
                                     cb_svc_url_prefix = Sys.getenv("COUCHBASE_SVC_URL_PREFIX"),
                                     cb_username = Sys.getenv("COUCHBASE_USERNAME"),
                                     cb_password = Sys.getenv("COUCHBASE_PASSWORD")) {

  keys <- jsonlite::toJSON(keys)

  httr::POST(
    url = glue::glue("{cb_svc_url_prefix}/query/service"),
    httr::authenticate(cb_username, cb_password),
    body = glue::glue('SELECT * FROM {bucket} USE KEYS {keys}'),
    encode = "raw"
  ) -> res

  out <-  httr::content(res, as = "text", encoding = "UTF-8")
  out <- jsonlite::fromJSON(out)
  out <- out[["results"]][[unique(names(out[["results"]]))]]
  tibble::as_tibble(out)

}

.cb_fts_q <- function(query,
                      fts_index,
                      from = 0,
                      cb_fts_url_prefix = Sys.getenv("COUCHBASE_FTS_URL_PREFIX"),
                      cb_username = Sys.getenv("COUCHBASE_USERNAME"),
                      cb_password = Sys.getenv("COUCHBASE_PASSWORD")) {

  httr::POST(
    url = glue::glue("{cb_fts_url_prefix}/api/index/{fts_index}/query"),
    httr::authenticate(cb_username, cb_password),
    body = list(
      query = list(
        query = query,
        from = from
      )
    ),
    encode = "json"
  ) -> res

  httr::stop_for_status(res)

  out <- httr::content(res, as = "text", encoding = "UTF-8")
  out <- jsonlite::fromJSON(out)
  out

}

#' Perform a full text search and receive matching documents
#' 
#' @param query your [free text query](https://docs.couchbase.com/server/6.0/fts/full-text-intro.html)
#' @param index the full text [query index](https://docs.couchbase.com/server/6.0/fts/fts-creating-indexes.html)
#' @param bucket the bucket where the documents live
#' @param cb_svc_url_prefix,cb_username,cb_password Your couchbase connection info
cb_fts <- function(query,
                   index,
                   bucket,
                   cb_fts_url_prefix = Sys.getenv("COUCHBASE_FTS_URL_PREFIX"),
                   cb_svc_url_prefix = Sys.getenv("COUCHBASE_SVC_URL_PREFIX"),
                   cb_username = Sys.getenv("COUCHBASE_USERNAME"),
                   cb_password = Sys.getenv("COUCHBASE_PASSWORD")) {

  # get initial result set

  .cb_fts_q(
    query = query,
    fts_index = index,
    from = 0,
    cb_fts_url_prefix = cb_fts_url_prefix,
    cb_username = cb_username,
    cb_password = cb_password
  ) -> rs

  if (rs[["total_hits"]] == 0) return(NULL) # bad/0-result query

  if (rs[["total_hits"]] > 10) { # if we need to paginate

    times <- seq(10, rs[["total_hits"]], 10) # figure out how many times

    more <- purrr::map(times, ~.cb_fts_q( # do the thing
      query = query,
      fts_index = index,
      from = .x,
      cb_fts_url_prefix = cb_fts_url_prefix,
      cb_username = cb_username,
      cb_password = cb_password
    ))

    rs <- c(list(rs), more) # add back the original query and leave for futher processing

  } else {
    rs <- list(rs) # encapsulate to match ^^ format and leave for futher processing
  }

  purrr::flatten_chr(
    purrr::map(rs, ~.x[["hits"]][["id"]]) # get the record IDs
  ) -> keys

  cb_get_records_from_keys( # retrieve the records that match
    keys = keys,
    bucket = bucket,
    cb_svc_url_prefix = cb_svc_url_prefix,
    cb_username = cb_username,
    cb_password = cb_password
  )

}