ES update nested array (using Java API)

Recently, I was working on a requirement. At the beginning, I thought it could be solved with es script, which was delayed for a day and a half.

Later, the api of Java client was used, which was much more efficient.

package com.XXX.XXXX.XXX;

import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.assertj.core.util.Arrays;
import org.elasticsearch.action.DocWriteResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import javax.annotation.Resource;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

@Slf4j
@RunWith(SpringRunner.class)
@SpringBootTest
public class HestiaApplicationTests {

    @Resource
    protected RestHighLevelClient client;

    @Test
    public void contextLoads() {
    }

    /**
     * *Retrieve and then update the document
     **/
    @Test
    public void search() throws IOException {

		try {
			SearchRequest searchRequest = new SearchRequest("zm_prod");
			SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
			// What page
			searchSourceBuilder.from(0);
			Map<String, Object> params = new HashMap<>();
			//Course label: 1 Large class; 2. Small class; 3.AI class; 4. Grinding class
//        Script script = new Script(ScriptType.INLINE, "painless", "ctx._source.values.base_info.course_mark == 2", params);
//        searchSourceBuilder.query(QueryBuilders.scriptQuery(script));
//        searchSourceBuilder.query(QueryBuilders.matchQuery("_source.values.base_info.course_mark",2));


			searchSourceBuilder.query(QueryBuilders.termQuery("_id", 55074));
//			searchSourceBuilder.query(QueryBuilders.matchAllQuery());
			// How many pieces of data per page
			searchSourceBuilder.size(1000);
			// Set the timeout to 2s
			searchSourceBuilder.timeout(new TimeValue(2000));

			// Set the index and type of request to search
//		searchRequest.indices("zm_prod").types("news");
			// Set SearchSourceBuilder query properties
			searchRequest.source(searchSourceBuilder);

			SearchResponse searchResponse = client.search(searchRequest);
			SearchHit[] searchHits = searchResponse.getHits().getHits();
			if (ArrayUtils.isEmpty(searchHits)) {
				log.info("searchHits = 666666666666666666666");
			}

			for (SearchHit hit : searchHits) {
				//Primary key of the document
				String id = hit.getId();
				String index = hit.getIndex();
				String type = hit.getType();
				String sourceAsString = hit.getSourceAsString();
				if(!sourceAsString.contains("course_mark")){
					continue;
				}
				//Source document content
				Map<String, Object> sourceAsMap = hit.getSource();
//				String s = JSON.toJSONString(sourceAsMap);
				Map<String, Object> values = (Map)sourceAsMap.get("values");
				Map<String, Object>  base_info = (Map)values.get("base_info");

				Integer course_mark = (Integer) base_info.get("course_mark");
				String base_infostr = JSON.toJSONString(base_info);
				if(null!=course_mark){
					if(course_mark==1||course_mark==2||course_mark==3){
						log.info(" base_infostr = " + base_infostr);

                         //TODO reversely adds fields to the map, reassembles, and updates the current data
						base_info.put("BBB",7777);
						values.put("base_info",base_info);

						UpdateRequest uRequest = new UpdateRequest()
								.index(index)
								.type(type)
								.id(id)
								.doc(XContentFactory.jsonBuilder()
										.startObject().field("values", values).endObject());
						BulkRequest blkRequest = new BulkRequest();
						blkRequest.add(uRequest);
                       // implement
						BulkResponse bulkResponse = client.bulk(blkRequest);
//						for (UpdateRequest uprequest : list) {
//							bulkResponse.add(uprequest);
//						}

//						BulkResponse bulkResponse = bulkResponse.execute().actionGet();

						if (bulkResponse.hasFailures()) {
							System.out.println("Batch error!");
						}

					}
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
			log.error(ExceptionUtils.getMessage(e));
		}
    }

}

The following is a reference to other bloggers;

Recently, I was studying ElasticSearch. I encountered a difficult problem in my work a few days ago. As shown in the title, I used Java TransportClient to update ES complex data structure array. Finally, I asked the boss to solve the problem. This article will describe the problem in detail and provide solutions.

The main updated data formats are as follows:

Raw data: an array of nested types Updated data: delete the data corresponding to mall 01 from the array

"list":[
    {
    "code": "9111364",
    "name": "Enterprise 01"
    },
    {
     "code": "900662",
    "name": "Smart 01"
   },
   {
   "code": "9000300",
   "name": "Mall 01"
    }
]

The blogger is ES Xiaobai. He doesn't know how to correctly use UpdateRequest to update this type of data.

Therefore, using the following method, value represents the updated data (that is, there is no list data of "mall 01"). For the type of value, bloggers have tried Object and list < >, and even can't convert the list to Json format.

//XXXXXXXXX XX indicates the data to be updated // List value = XXXXXXXXX; //XXXXXXXXX XX indicates the data to be updated // Object value = XXXXXXXXX; List> value = XXXXXXXXX; // This works well updateRequest.doc(XContentFactory.jsonBuilder() .startObject() .field("name", value) .field(flag, 1) .endObject()); Finally, the boss told me to convert List value to List > value, that is, when using updateRequest, ES cannot be updated normally when the field type is an object array. Es will recognize the generic special Map type in the List.

//Convert the nested array object to Set format (List is also OK), otherwise it cannot be updated (an error will be reported) List> set = Lists.newArrayList(); Map map = Maps.newHashMap(); Class clazz; //Use reflection to dynamically put the attribute values in the Set into the Map for (Object obj : setArry) { clazz = obj.getClass(); //Traverses the property values of the current object for (Field field : clazz.getDeclaredFields()) { field.setAccessible(true); String name = field.getName(); Object value = field.get(obj); map.put(name, value); } set.add(map); map = Maps.newHashMap(); Because there are many array types, bloggers use reflection, which can be compatible with each array type. Finally, the data was successfully updated.

Also referred to

If you update a document and know the document id, you can use UpdateRequest. The code is as follows:

/**
     * Update by document id
     * @throws IOException
     */
    @Test
    public void test() throws IOException {
        UpdateRequest request = new UpdateRequest("sub_bank1031","sub_bank","SvjgP24BndtcmnpzbiuL");
        request.doc("{\"aliasName\":\"Agricultural Development Bank of China Linzhou sub branch 444\",\"bankType\":\"ADB\",\"bankTypeName\":\"Agricultural Development Bank of China\",\"cityId\":\"410500\",\"cityName\":\"Anyang City\",\"createTime\":1515719190000,\"createUser\":\"system\",\"id\":\"000238a326b044e9ae10cfe4298f4c44\",\"isEnabled\":\"1\",\"name\":\"Agricultural Development Bank of China Linzhou sub branch\",\"provinceId\":\"410000\",\"provinceName\":\"Henan Province\",\"unionNumber\":\"203496100010\"}", XContentType.JSON);
        UpdateResponse resp = highLevelClient.update(request, RequestOptions.DEFAULT);
        println(resp.getResult());
    }

However, if you do not know the document id and want to use UpdateRequest to update the document, you need to use SearchRequest to query the qualified documents according to a certain condition, and then recycle and update the documents.

    /**
    **Retrieve and then update the document
    **/
   	@Test
	public void search() throws IOException{
		SearchRequest searchRequest = new SearchRequest("sub_bank1031");
		SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
		searchSourceBuilder.query(QueryBuilders.matchQuery("_id", "SvjgP24BndtcmnpzbiuL"));
		searchSourceBuilder.size(2);
		searchRequest.source(searchSourceBuilder);
		SearchResponse searchResponse = highLevelClient.search(searchRequest, RequestOptions.DEFAULT); 
		SearchHit[] searchHits = searchResponse.getHits().getHits();
	    for(SearchHit s:searchHits){
           String docId = s.getId();
    		UpdateRequest request = new UpdateRequest("sub_bank1031","sub_bank",docId);
		request.doc("{\"aliasName\":\"Agricultural Development Bank of China Linzhou sub branch 444\",\"bankType\":\"ADB\",\"bankTypeName\":\"Agricultural Development Bank of China\",\"cityId\":\"410500\",\"cityName\":\"Anyang City\",\"createTime\":1515719190000,\"createUser\":\"system\",\"id\":\"000238a326b044e9ae10cfe4298f4c44\",\"isEnabled\":\"1\",\"name\":\"Agricultural Development Bank of China Linzhou sub branch\",\"provinceId\":\"410000\",\"provinceName\":\"Henan Province\",\"unionNumber\":\"203496100010\"}", XContentType.JSON);
		UpdateResponse resp = highLevelClient.update(request, RequestOptions.DEFAULT);
		println(resp.getResult());              
	    }
	}

The above operation is a little troublesome. It requires multiple http requests to complete. When the number of documents to be updated is large, the system response speed will be greatly reduced. At this time, we can use es's UpdateByQueryRequest to realize this function.

/**
     * Update document via script
     * @throws IOException
     */
    @Test
    public void updateByQueryRequest() throws IOException {
        UpdateByQueryRequest request = new UpdateByQueryRequest("sub_bank1031");
        request.setDocTypes("sub_bank");
        request.setQuery(new TermQueryBuilder("cityId", "511000"));
        request.setSize(2);
        request.setScript(
                new Script(
                    ScriptType.INLINE, "painless",
                    "if (ctx._source.bankType == 'BOC') {ctx._source.aliasName='hello'}",
                    Collections.emptyMap()));    
        BulkByScrollResponse resp = highLevelClient.updateByQuery(request, RequestOptions.DEFAULT);
    }

PS:

pom file es related dependencies are as follows:

  <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch</artifactId>
            <version>6.8.0</version>
        </dependency>
        <!-- elasticsearch high level -->    
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
            <exclusions>
                <exclusion>
                    <groupId>org.elasticsearch</groupId>
                    <artifactId>elasticsearch</artifactId>                
                </exclusion>
            </exclusions>
            <version>6.8.0</version>
        </dependency>

Added by Gubbins on Thu, 06 Jan 2022 09:51:12 +0200

Programming VIP

ES update nested array (using Java API)

Popular Keywords