Skip to content

Commit e9360ab

Browse files
committed
#1 crawler updates
add parameters to select crawling scope add ability to chunk elements
1 parent 67eaffd commit e9360ab

File tree

3 files changed

+136
-19
lines changed

3 files changed

+136
-19
lines changed

com.incquerylabs.vhci.modelaccess.twc.rest/src/main/kotlin/com/incquerylabs/vhci/modelaccess/twc/rest/Crawler.kt

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,56 @@ fun main(args: Array<String>) {
2525
.setArgName("username")
2626
.setIndex(0)
2727
.setDefaultValue("admin")
28-
.setDescription("TWC username."))
28+
.setDescription("TWC username.")
29+
)
2930
.addArgument(Argument()
3031
.setArgName("password")
3132
.setIndex(1)
3233
.setDefaultValue("admin")
33-
.setDescription("TWC password."))
34+
.setDescription("TWC password.")
35+
)
3436
.addOption(Option()
3537
.setLongName("help")
3638
.setShortName("h")
3739
.setDescription("Show help site.")
38-
.setFlag(true))
40+
.setFlag(true)
41+
)
3942
.addOption(Option()
4043
.setLongName("server")
4144
.setShortName("S")
42-
.setDescription("Set server path."))
45+
.setDescription("Set server path.")
46+
)
4347
.addOption(Option()
4448
.setLongName("port")
4549
.setShortName("P")
46-
.setDescription("Set server port number."))
50+
.setDescription("Set server port number.")
51+
)
4752
.addOption(Option()
4853
.setLongName("instanceNum")
4954
.setShortName("I")
5055
.setDescription("Set number of RESTVerticle instances. Default:16")
51-
.setDefaultValue("16"))
56+
.setDefaultValue("16")
57+
)
58+
.addOption(Option()
59+
.setLongName("workspaceId")
60+
.setShortName("W")
61+
.setDescription("Select workspace to crawl")
62+
)
63+
.addOption(Option()
64+
.setLongName("resourceId")
65+
.setShortName("R")
66+
.setDescription("Select resource to crawl")
67+
)
68+
.addOption(Option()
69+
.setLongName("branchId")
70+
.setShortName("B")
71+
.setDescription("Select branch to crawl")
72+
)
73+
.addOption(Option()
74+
.setLongName("revision")
75+
.setShortName("REV")
76+
.setDescription("Select revision to crawl")
77+
)
5278

5379

5480
val commandLine = cli.parse(args.asList(),false)
@@ -67,13 +93,33 @@ fun main(args: Array<String>) {
6793
val serverOpt = commandLine.getOptionValue<String>("server")
6894
val portOpt = commandLine.getOptionValue<String>("port")
6995
val instanceNum = commandLine.getOptionValue<String>("instanceNum").toInt()
96+
val workspaceId = commandLine.getOptionValue<String>("workspaceId")
97+
val resourceId = commandLine.getOptionValue<String>("resourceId")
98+
val branchId = commandLine.getOptionValue<String>("branchId")
99+
val revision = commandLine.getOptionValue<String>("revision")
70100

71101
if(instanceNum!=null){
72-
println(instanceNum)
102+
println("Instance number set to $instanceNum")
73103
if(instanceNum<1){
74104
error("Number of Instances should be at least 1.")
75105
}
76106
}
107+
if(workspaceId != null){
108+
println("Workspace ID set to $workspaceId")
109+
twcMap.put(DataConstants.WORKSPACE_ID, workspaceId)
110+
}
111+
if(resourceId != null){
112+
println("Resource ID set to $resourceId")
113+
twcMap.put(DataConstants.RESOURCE_ID, resourceId)
114+
}
115+
if(branchId != null){
116+
println("Branch ID set to $branchId")
117+
twcMap.put(DataConstants.BRANCH_ID, branchId)
118+
}
119+
if(revision != null){
120+
println("Revision set to $revision")
121+
twcMap.put(DataConstants.REVISION, revision.toInt())
122+
}
77123

78124
if(serverOpt != null && portOpt!=null){
79125
if(!File("server.config").exists()){

com.incquerylabs.vhci.modelaccess.twc.rest/src/main/kotlin/com/incquerylabs/vhci/modelaccess/twc/rest/verticles/MainVerticle.kt

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,39 @@ class MainVerticle(val usr: String, val pswd: String) : AbstractVerticle() {
7171
DataConstants.LOGGED_IN -> {
7272
println("Login complete")
7373

74-
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_WORKSPACES, JsonObject())))
74+
val workspaceId = twcMap.get(DataConstants.WORKSPACE_ID)
75+
val resourceId = twcMap.get(DataConstants.RESOURCE_ID)
76+
val branchId = twcMap.get(DataConstants.BRANCH_ID)
77+
val revision = twcMap.get(DataConstants.REVISION)
78+
if(workspaceId == null){
79+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_WORKSPACES, JsonObject())))
80+
} else if (resourceId == null){
81+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_RESOURCES, JsonObject().put(DataConstants.WORKSPACE_ID, workspaceId))))
82+
} else if (branchId == null) {
83+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_BRANCHES,
84+
JsonObject()
85+
.put(DataConstants.WORKSPACE_ID, workspaceId)
86+
.put(DataConstants.RESOURCE_ID, resourceId))))
87+
} else if (revision == null) {
88+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_REVISIONS,
89+
JsonObject()
90+
.put(DataConstants.WORKSPACE_ID, workspaceId)
91+
.put(DataConstants.RESOURCE_ID, resourceId)
92+
.put(DataConstants.BRANCH_ID, branchId)
93+
)))
94+
} else {
95+
vertx.eventBus().send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.GET_ROOT_ELEMENT_IDS,
96+
JsonObject()
97+
.put(DataConstants.WORKSPACE_ID, workspaceId)
98+
.put(DataConstants.RESOURCE_ID, resourceId)
99+
.put(DataConstants.BRANCH_ID, branchId)
100+
.put(DataConstants.REVISION_ID, revision)
101+
)))
102+
}
103+
104+
105+
106+
75107
//eb.send("twc.rest.twcvert", Json.encode(Message("logout", JsonObject())))
76108
}
77109
DataConstants.REPO -> {
@@ -168,6 +200,7 @@ class MainVerticle(val usr: String, val pswd: String) : AbstractVerticle() {
168200
// println("Received Element")
169201
// println(data)
170202

203+
val revisionId = data.getInteger(DataConstants.REVISION_ID)
171204
val branchId = data.getString(DataConstants.BRANCH_ID)
172205
val resourceId = data.getString(DataConstants.RESOURCE_ID)
173206
val workspaceId = data.getString(DataConstants.WORKSPACE_ID)
@@ -179,6 +212,7 @@ class MainVerticle(val usr: String, val pswd: String) : AbstractVerticle() {
179212
.put(DataConstants.WORKSPACE_ID, workspaceId)
180213
.put(DataConstants.RESOURCE_ID, resourceId)
181214
.put(DataConstants.BRANCH_ID, branchId)
215+
.put(DataConstants.REVISION_ID, revisionId)
182216
.put(DataConstants.ELEMENT_ID, element_id)
183217
)))
184218
}
@@ -205,10 +239,16 @@ class MainVerticle(val usr: String, val pswd: String) : AbstractVerticle() {
205239
}
206240
DataConstants.ERROR -> {
207241
println("\nExit")
242+
if(twcMap.get("cookies") != null) {
243+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.LOGOUT, JsonObject())))
244+
}
208245
vertx.close()
209246
}
210247
DataConstants.EXIT -> {
211248
println("\nExit")
249+
if(twcMap.get("cookies") != null) {
250+
eb.send(DataConstants.TWCVERT_ADDRESS, Json.encode(Message(DataConstants.LOGOUT, JsonObject())))
251+
}
212252
vertx.close()
213253
}
214254

com.incquerylabs.vhci.modelaccess.twc.rest/src/main/kotlin/com/incquerylabs/vhci/modelaccess/twc/rest/verticles/RESTVerticle.kt

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class RESTVerticle() : AbstractVerticle(){
2525
serverPath = twcMap.get("server_path").toString()
2626
port = twcMap.get("server_port") as Int
2727

28+
val debug = false
29+
2830
vertx.eventBus().consumer<Any>(DataConstants.TWCVERT_ADDRESS,{ message ->
2931
val json = JsonObject(message.body().toString())
3032
val obj = json.getJsonObject("obj")
@@ -40,33 +42,52 @@ class RESTVerticle() : AbstractVerticle(){
4042
logout(client,twcMap)
4143
}
4244
DataConstants.GET_WORKSPACES ->{
43-
// println("Query Workspaces")
45+
if (debug) {
46+
println("Query Workspaces")
47+
println(obj)
48+
}
4449
getWorkspaces(client,twcMap)
4550
}
4651
DataConstants.GET_RESOURCES ->{
47-
// println("Query Resources")
48-
// println(obj)
52+
if (debug) {
53+
println("Query Resources")
54+
println(obj)
55+
}
4956
getResources(client,twcMap,obj)
50-
//(client,twcMap)
5157
}
5258
DataConstants.GET_BRANCHES->{
53-
// println("Query Branches")
59+
if (debug) {
60+
println("Query Branches")
61+
println(obj)
62+
}
5463
getBranches(client,twcMap,obj)
5564
}
5665
DataConstants.GET_REVISIONS ->{
57-
// println("Query Revisions")
66+
if (debug) {
67+
println("Query Revisions")
68+
println(obj)
69+
}
5870
getRevisions(client,twcMap,obj)
5971
}
6072
DataConstants.GET_ROOT_ELEMENT_IDS ->{
61-
// println("Search Root Element Ids")
73+
if (debug) {
74+
println("Search Root Element Ids")
75+
println(obj)
76+
}
6277
getRootElementIds(client,twcMap,obj)
6378
}
6479
DataConstants.GET_ELEMENT ->{
65-
//println("Query Element")
80+
if (debug) {
81+
println("Query Element")
82+
println(obj)
83+
}
6684
getElement(client,twcMap,obj)
6785
}
6886
DataConstants.GET_ELEMENTS ->{
69-
//println("Query Element")
87+
if (debug) {
88+
println("Query Elements")
89+
println(obj)
90+
}
7091
getElements(client,twcMap,obj)
7192
}
7293
else -> error("Unknown Command: ${json.getString("event")}")
@@ -110,8 +131,18 @@ class RESTVerticle() : AbstractVerticle(){
110131
element.getJsonObject(0).getJsonArray("ldp:contains")
111132
}
112133
if(!containedElements.isEmpty()) {
113-
val elementM = Elements(revisionId,branchId,resourceId,workspaceId,JsonArray(containedElements))
114-
vertx.eventBus().send(DataConstants.TWCMAIN_ADDRESS, Json.encode(Message(DataConstants.ELEMENTS, elementM)))
134+
val chunkSize = 1000
135+
if (chunkSize > 1) {
136+
containedElements.withIndex().groupBy {
137+
it.index / chunkSize
138+
}.values.map { it.map { it.value } }.forEach {chunkList ->
139+
val elementM = Elements(revisionId,branchId,resourceId,workspaceId,JsonArray(chunkList))
140+
vertx.eventBus().send(DataConstants.TWCMAIN_ADDRESS, Json.encode(Message(DataConstants.ELEMENTS, elementM)))
141+
}
142+
} else {
143+
val elementM = Elements(revisionId,branchId,resourceId,workspaceId,JsonArray(containedElements))
144+
vertx.eventBus().send(DataConstants.TWCMAIN_ADDRESS, Json.encode(Message(DataConstants.ELEMENTS, elementM)))
145+
}
115146
}
116147

117148
} else {

0 commit comments

Comments
 (0)