001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 019 package org.apache.commons.net.nntp; 020 021 /** 022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 025 * 026 * @author rwinston <rwinston@checkfree.com> 027 * 028 */ 029 030 import java.util.HashMap; 031 import java.util.Iterator; 032 import java.util.List; 033 034 public class Threader { 035 private ThreadContainer root; 036 private HashMap<String,ThreadContainer> idTable; 037 private int bogusIdCount = 0; 038 039 /** 040 * The client passes in a list of Threadable objects, and 041 * the Threader constructs a connected 'graph' of messages 042 * @param messages list of messages to thread 043 * @return null if messages == null or root.child == null 044 * @since 2.2 045 */ 046 public Threadable thread(List<? extends Threadable> messages) { 047 return thread((Iterable<? extends Threadable>)messages); 048 } 049 050 /** 051 * The client passes in a list of Iterable objects, and 052 * the Threader constructs a connected 'graph' of messages 053 * @param messages iterable of messages to thread 054 * @return null if messages == null or root.child == null 055 * @since 3.0 056 */ 057 public Threadable thread(Iterable<? extends Threadable> messages) { 058 if (messages == null) 059 return null; 060 061 idTable = new HashMap<String,ThreadContainer>(); 062 063 // walk through each Threadable element 064 for (Threadable t : messages) { 065 if (!t.isDummy()) 066 buildContainer(t); 067 } 068 069 root = findRootSet(); 070 idTable.clear(); 071 idTable = null; 072 073 pruneEmptyContainers(root); 074 075 root.reverseChildren(); 076 gatherSubjects(); 077 078 if (root.next != null) 079 throw new RuntimeException("root node has a next:" + root); 080 081 for (ThreadContainer r = root.child; r != null; r = r.next) { 082 if (r.threadable == null) 083 r.threadable = r.child.threadable.makeDummy(); 084 } 085 086 Threadable result = (root.child == null ? null : root.child.threadable); 087 root.flush(); 088 root = null; 089 090 return result; 091 } 092 093 /** 094 * 095 * @param threadable 096 */ 097 private void buildContainer(Threadable threadable) { 098 String id = threadable.messageThreadId(); 099 ThreadContainer container = idTable.get(id); 100 101 // A ThreadContainer exists for this id already. This should be a forward reference, but may 102 // be a duplicate id, in which case we will need to generate a bogus placeholder id 103 if (container != null) { 104 if (container.threadable != null) { // oops! duplicate ids... 105 id = "<Bogus-id:" + (bogusIdCount++) + ">"; 106 container = null; 107 } else { 108 // The container just contained a forward reference to this message, so let's 109 // fill in the threadable field of the container with this message 110 container.threadable = threadable; 111 } 112 } 113 114 // No container exists for that message Id. Create one and insert it into the hash table. 115 if (container == null) { 116 container = new ThreadContainer(); 117 container.threadable = threadable; 118 idTable.put(id, container); 119 } 120 121 // Iterate through all of the references and create ThreadContainers for any references that 122 // don't have them. 123 ThreadContainer parentRef = null; 124 { 125 String[] references = threadable.messageThreadReferences(); 126 for (int i = 0; i < references.length; ++i) { 127 String refString = references[i]; 128 ThreadContainer ref = idTable.get(refString); 129 130 // if this id doesnt have a container, create one 131 if (ref == null) { 132 ref = new ThreadContainer(); 133 idTable.put(refString, ref); 134 } 135 136 // Link references together in the order they appear in the References: header, 137 // IF they dont have a have a parent already && 138 // IF it will not cause a circular reference 139 if ((parentRef != null) 140 && (ref.parent == null) 141 && (parentRef != ref) 142 && !(ref.findChild(parentRef))) { 143 // Link ref into the parent's child list 144 ref.parent = parentRef; 145 ref.next = parentRef.child; 146 parentRef.child = ref; 147 } 148 parentRef = ref; 149 } 150 } 151 152 // parentRef is now set to the container of the last element in the references field. make that 153 // be the parent of this container, unless doing so causes a circular reference 154 if (parentRef != null 155 && (parentRef == container || container.findChild(parentRef))) 156 parentRef = null; 157 158 // if it has a parent already, its because we saw this message in a References: field, and presumed 159 // a parent based on the other entries in that field. Now that we have the actual message, we can 160 // throw away the old parent and use this new one 161 if (container.parent != null) { 162 ThreadContainer rest, prev; 163 164 for (prev = null, rest = container.parent.child; 165 rest != null; 166 prev = rest, rest = rest.next) { 167 if (rest == container) 168 break; 169 } 170 171 if (rest == null) { 172 throw new RuntimeException( 173 "Didnt find " 174 + container 175 + " in parent" 176 + container.parent); 177 } 178 179 // Unlink this container from the parent's child list 180 if (prev == null) 181 container.parent.child = container.next; 182 else 183 prev.next = container.next; 184 185 container.next = null; 186 container.parent = null; 187 } 188 189 // If we have a parent, link container into the parents child list 190 if (parentRef != null) { 191 container.parent = parentRef; 192 container.next = parentRef.child; 193 parentRef.child = container; 194 } 195 } 196 197 /** 198 * Find the root set of all existing ThreadContainers 199 * @return root the ThreadContainer representing the root node 200 */ 201 private ThreadContainer findRootSet() { 202 ThreadContainer root = new ThreadContainer(); 203 Iterator<String> iter = idTable.keySet().iterator(); 204 205 while (iter.hasNext()) { 206 Object key = iter.next(); 207 ThreadContainer c = idTable.get(key); 208 if (c.parent == null) { 209 if (c.next != null) 210 throw new RuntimeException( 211 "c.next is " + c.next.toString()); 212 c.next = root.child; 213 root.child = c; 214 } 215 } 216 return root; 217 } 218 219 /** 220 * Delete any empty or dummy ThreadContainers 221 * @param parent 222 */ 223 private void pruneEmptyContainers(ThreadContainer parent) { 224 ThreadContainer container, prev, next; 225 for (prev = null, container = parent.child, next = container.next; 226 container != null; 227 prev = container, 228 container = next, 229 next = (container == null ? null : container.next)) { 230 231 // Is it empty and without any children? If so,delete it 232 if (container.threadable == null && container.child == null) { 233 if (prev == null) 234 parent.child = container.next; 235 else 236 prev.next = container.next; 237 238 // Set container to prev so that prev keeps its same value the next time through the loop 239 container = prev; 240 } 241 242 // Else if empty, with kids, and (not at root or only one kid) 243 else if ( 244 container.threadable == null 245 && container.child != null 246 && (container.parent != null 247 || container.child.next == null)) { 248 // We have an invalid/expired message with kids. Promote the kids to this level. 249 ThreadContainer tail; 250 ThreadContainer kids = container.child; 251 252 // Remove this container and replace with 'kids'. 253 if (prev == null) 254 parent.child = kids; 255 else 256 prev.next = kids; 257 258 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next 259 // i.e. splice kids into the list in place of container 260 for (tail = kids; tail.next != null; tail = tail.next) 261 tail.parent = container.parent; 262 263 tail.parent = container.parent; 264 tail.next = container.next; 265 266 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 267 // promoted items next time round 268 next = kids; 269 270 // Set container to prev so that prev keeps its same value the next time through the loop 271 container = prev; 272 } else if (container.child != null) { 273 // A real message , with kids 274 // Iterate over the children 275 pruneEmptyContainers(container); 276 } 277 } 278 } 279 280 /** 281 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 282 */ 283 private void gatherSubjects() { 284 285 int count = 0; 286 287 for (ThreadContainer c = root.child; c != null; c = c.next) 288 count++; 289 290 // TODO verify this will avoid rehashing 291 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9); 292 count = 0; 293 294 for (ThreadContainer c = root.child; c != null; c = c.next) { 295 Threadable threadable = c.threadable; 296 297 // No threadable? If so, it is a dummy node in the root set. 298 // Only root set members may be dummies, and they alway have at least 2 kids 299 // Take the first kid as representative of the subject 300 if (threadable == null) 301 threadable = c.child.threadable; 302 303 String subj = threadable.simplifiedSubject(); 304 305 if (subj == null || subj == "") 306 continue; 307 308 ThreadContainer old = subjectTable.get(subj); 309 310 // Add this container to the table iff: 311 // - There exists no container with this subject 312 // - or this is a dummy container and the old one is not - the dummy one is 313 // more interesting as a root, so put it in the table instead 314 // - The container in the table has a "Re:" version of this subject, and 315 // this container has a non-"Re:" version of this subject. The non-"Re:" version 316 // is the more interesting of the two. 317 if (old == null 318 || (c.threadable == null && old.threadable != null) 319 || (old.threadable != null 320 && old.threadable.subjectIsReply() 321 && c.threadable != null 322 && !c.threadable.subjectIsReply())) { 323 subjectTable.put(subj, c); 324 count++; 325 } 326 } 327 328 // If the table is empty, we're done 329 if (count == 0) 330 return; 331 332 // subjectTable is now populated with one entry for each subject which occurs in the 333 // root set. Iterate over the root set, and gather together the difference. 334 ThreadContainer prev, c, rest; 335 for (prev = null, c = root.child, rest = c.next; 336 c != null; 337 prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 338 Threadable threadable = c.threadable; 339 340 // is it a dummy node? 341 if (threadable == null) 342 threadable = c.child.threadable; 343 344 String subj = threadable.simplifiedSubject(); 345 346 // Dont thread together all subjectless messages 347 if (subj == null || subj == "") 348 continue; 349 350 ThreadContainer old = subjectTable.get(subj); 351 352 if (old == c) // That's us 353 continue; 354 355 // We have now found another container in the root set with the same subject 356 // Remove the "second" message from the root set 357 if (prev == null) 358 root.child = c.next; 359 else 360 prev.next = c.next; 361 c.next = null; 362 363 if (old.threadable == null && c.threadable == null) { 364 // both dummies - merge them 365 ThreadContainer tail; 366 for (tail = old.child; 367 tail != null && tail.next != null; 368 tail = tail.next){} 369 370 if (tail != null) { // protect against possible NPE 371 tail.next = c.child; 372 } 373 374 for (tail = c.child; tail != null; tail = tail.next) 375 tail.parent = old; 376 377 c.child = null; 378 } else if ( 379 old.threadable == null 380 || (c.threadable != null 381 && c.threadable.subjectIsReply() 382 && !old.threadable.subjectIsReply())) { 383 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 384 c.parent = old; 385 c.next = old.child; 386 old.child = c; 387 } else { 388 // else make the old and new messages be children of a new dummy container. 389 // We create a new container object for old.msg and empty the old container 390 ThreadContainer newc = new ThreadContainer(); 391 newc.threadable = old.threadable; 392 newc.child = old.child; 393 394 for (ThreadContainer tail = newc.child; 395 tail != null; 396 tail = tail.next) 397 tail.parent = newc; 398 399 old.threadable = null; 400 old.child = null; 401 402 c.parent = old; 403 newc.parent = old; 404 405 // Old is now a dummy- give it 2 kids , c and newc 406 old.child = c; 407 c.next = newc; 408 } 409 // We've done a merge, so keep the same prev 410 c = prev; 411 } 412 413 subjectTable.clear(); 414 subjectTable = null; 415 416 } 417 418 419 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 420 421 /** 422 * The client passes in an array of Threadable objects, and 423 * the Threader constructs a connected 'graph' of messages 424 * @param messages array of messages to thread 425 * @return null if messages == null or root.child == null 426 * @deprecated (2.2) prefer {@link #thread(List)} 427 */ 428 @Deprecated 429 public Threadable thread(Threadable[] messages) { 430 return thread(java.util.Arrays.asList(messages)); 431 } 432 433 }