Commit 9e3b4ed8 by amira

upload datasets

parent d46446dc
......@@ -5,19 +5,22 @@ https://www.elastic.co/guide/en/kibana/5.5/_configuring_kibana_on_docker.html
# show indices and size
curl 172.16.1.72:9200/_cat/indices?v
# delete specific document by id (_id)
curl -XDELETE http://localhost:9200/activityidx_v2/activity/AXEQN08B5-afsw1eO1rt
#create index with mapping
curl -XPUT "http://localhost:9200/activityidx" -d'{ "mappings": { "activity": { "properties": { "metaData.loc": { "type": "geo_point" } } } } }'
- https://www.elastic.co/blog/changing-mapping-with-zero-downtime
Steps to change mapping type by reindexing and alias on a new index (using the kibana tools):
1 - get the mapping of the original index: GET activityidx/_mapping (delete the starting field 'activityidx)
2 - create a json file with the changed mapping
2 - create a json file with the changed mapping (without the index name at the begining of json)
3 - curl -XPUT localhost:9200/activityidx_v1 -H 'Content-Type: application/json' -d @index-mapping (using curl)
4 reindex to the new original to the new one:
POST _reindex
{
"conflicts": "proceed",
"source": {
"index": "activityidx"
"index": "activityidx_v2"
},
"dest": {
"index": "activityidx_v1"
......@@ -42,11 +45,11 @@ curl -XDELETE http://localhost:9200/.kibana
curl -XDELETE http://localhost:9200/*
# query less then date
curl -XPOST 'http://172.16.1.72:9200/activityidx/activity/_search' -d '{
curl -XPOST 'http://localhost:9200/activityidx_v2/activity/_search' -d '{
"query": {
"range": {
"published": {
"lte": "now-90d"
"gte": "now-90d"
}
}
}
......@@ -214,11 +217,11 @@ es2csv -q 'type:"thing/traffic-lane" AND userId:"chicago/*"' -u 172.16.1.72:9200
es2csv -q 'type:"thing/traffic-lane" AND userId:"chicago/*" AND creationTime:>1519895494000' -u 172.16.1.72:9200 -o activities-traffic-chicago.csv -m 100000 -i activityidx -f creationTime id type metaData.loc metaData.measures.speed metaData.measures.vehicleCount
es2csv -r -q '{ "query":{"bool":{"must":[{"query_string":{"query":"tenant:fremont & type:\"thing/traffic-lanes\"" }}],"filter":{"geo_bounding_box":{"metaData.loc":{"top_left":{"lat" : 37.629134,"lon" : -122.173405 }, "bottom_right" : { "lat" : 37.457418, "lon" : -121.835116 }}}}}}}' -u 172.16.1.72:9200 -o activities-traffic-fremont.csv -m 100000 -i activityidx -f creationTime id type metaData.loc metaData.toLocation metaData.measures.speed
es2csv -r -q '{ "query":{"bool":{"must":[{"query_string":{"query":"tenant:fremont && type:\"thing/traffic-lanes\"" }}],"filter":{"geo_bounding_box":{"metaData.loc":{"top_left":{"lat" : 37.629134,"lon" : -122.173405 }, "bottom_right" : { "lat" : 37.457418, "lon" : -121.835116 }}}}}}}' -u 172.16.1.72:9200 -o activities-traffic-fremont.csv -m 100000 -i activityidx -f creationTime id type metaData.loc metaData.toLocation metaData.measures.speed
es2csv -r -q '{ "query": { "bool": { "must": [ { "query_string":{ "query":"metaData.event:TrafficJam" } } ], "filter" : { "geo_bounding_box" : { "metaData.loc" :{ "top_left" : { "lat" : 37.460391, "lon" : -122.167106 }, "bottom_right" : { "lat" : 37.152796, "lon" : -121.538847 } } }}}}}' -u 167.99.206.187:9200 -o activities-sanjose-traffic-reports.csv -m 5000000 -i activityidx -f creationTime id type metaData
es2csv -r -q '{ "query":{"bool":{"must":[{"query_string":{"query":"tenant:fremont & type:\"report/*\"" }}],"filter":[{"range": {"published": {"gte": 1576328644000,"format": "epoch_millis" }}},{"geo_bounding_box":{"metaData.loc":{"top_left":{"lat" : 37.629134,"lon" : -122.173405 }, "bottom_right" : { "lat" : 37.457418, "lon" : -121.835116 }}}}]}}}' -u 172.16.1.80:9200 -o reports-fremont.csv -m 100 -i activityidx -f creationTime id type metaData
es2csv -r -q '{ "query":{"bool":{"must":[{"query_string":{"query":"tenant:fremont && type:\"report/*\"" }}],"filter":[{"range": {"published": {"gte": 1576328644000,"format": "epoch_millis" }}},{"geo_bounding_box":{"metaData.loc":{"top_left":{"lat" : 37.629134,"lon" : -122.173405 }, "bottom_right" : { "lat" : 37.457418, "lon" : -121.835116 }}}}]}}}' -u 172.16.1.80:9200 -o reports-fremont.csv -m 100 -i activityidx -f creationTime id type metaData
// CURATOR
command line app for managing elasticsearch indices.
......
#!/bin/bash
upload_file_owncloud(){
curl -X PUT -u "test:giptmgrr" "https://municipalitybank.com:8085/remote.php/dav/files/test/${2}/${1}" --data-binary @"./$1"
}
create_dataset_reports() {
TENANT=$1
HOST=$2
CUR_TIMESTAMP=`date '+%s%3N'` # in mill-secs
#BACKWARD=604800000 # 7 days - 7 * 24 * 60 * 60 * 1000
#FROM_TIMESTAMP=$(( CUR_TIMESTAMP-BACKWARD ))
QUERY_JSON="{ \"query\":{\"bool\":{\"must\":[{\"query_string\":{\"query\":\"tenant:${TENANT} && type:\\\"report/*\\\"\" }}],\"filter\":[{\"range\": {\"published\": {\"gte\": \"now-7d\",\"format\": \"epoch_millis\" }}}]}}}"
TENANT_FILE_NAME="reports-${TENANT}-${HOST}"
rm ${TENANT_FILE_NAME}*.*
# create dataset - last 7 days
CREATE_DATASET=`es2csv -r -q "${QUERY_JSON}" -u localhost:9200 -o ${TENANT_FILE_NAME}.csv -i activityidx -f creationTime id type metaData`
#CREATE_DATASET=`es2csv -r -q ${QUERY_JSON} { "query":{"bool":{"must":[{"query_string":{"query":"tenant:${TENANT} & type:\"report/*\"" }}],"filter":[{"range": {"published": {"gte": "now-7d","format": "epoch_millis" }}}]}}}' -u localhost:9200 -m 100 -o reports-${TENANT}.csv -i activityidx -f creationTime id type metaData`
echo "${CUR_TIMESTAMP} - create dataset reports for ${TENANT}: ${CREATE_DATASET}" >> /opt/mcx/config/docker/upload-datasets.log
# gzip and upload
TODAY=`date '+%Y_%m_%d__%H_%M_%S'`;
TODAY_FILE="${TENANT_FILE_NAME}-$TODAY"
mv ${TENANT_FILE_NAME}.csv ${TODAY_FILE}.csv
gzip ${TODAY_FILE}.csv
# upload
echo "Uploading ${TODAY_FILE} to cloud"
upload_file_owncloud "${TODAY_FILE}.csv.gz" "Datasets/cities/${TENANT}/datasets"
}
create_dataset_reports "fremont" "integration"
create_dataset_reports "chicago" "integration"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment