001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    
019    package org.apache.commons.net.nntp;
020    
021    /**
022     * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023     * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024     * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025     *
026     * @author rwinston <rwinston@checkfree.com>
027     *
028     */
029    
030    import java.util.HashMap;
031    import java.util.Iterator;
032    import java.util.List;
033    
034    public class Threader {
035        private ThreadContainer root;
036        private HashMap<String,ThreadContainer> idTable;
037        private int bogusIdCount = 0;
038    
039        /**
040         * The client passes in a list of Threadable objects, and
041         * the Threader constructs a connected 'graph' of messages
042         * @param messages list of messages to thread
043         * @return null if messages == null or root.child == null
044         * @since 2.2
045         */
046        public Threadable thread(List<? extends Threadable> messages) {
047            return thread((Iterable<? extends Threadable>)messages);
048        }
049    
050        /**
051         * The client passes in a list of Iterable objects, and
052         * the Threader constructs a connected 'graph' of messages
053         * @param messages iterable of messages to thread
054         * @return null if messages == null or root.child == null
055         * @since 3.0
056         */
057        public Threadable thread(Iterable<? extends Threadable> messages) {
058            if (messages == null)
059                return null;
060    
061            idTable = new HashMap<String,ThreadContainer>();
062    
063            // walk through each Threadable element
064            for (Threadable t : messages) {
065                if (!t.isDummy())
066                    buildContainer(t);
067            }
068    
069            root = findRootSet();
070            idTable.clear();
071            idTable = null;
072    
073            pruneEmptyContainers(root);
074    
075            root.reverseChildren();
076            gatherSubjects();
077    
078            if (root.next != null)
079                throw new RuntimeException("root node has a next:" + root);
080    
081            for (ThreadContainer r = root.child; r != null; r = r.next) {
082                if (r.threadable == null)
083                    r.threadable = r.child.threadable.makeDummy();
084            }
085    
086            Threadable result = (root.child == null ? null : root.child.threadable);
087            root.flush();
088            root = null;
089    
090            return result;
091        }
092    
093        /**
094         *
095         * @param threadable
096         */
097        private void buildContainer(Threadable threadable) {
098            String id = threadable.messageThreadId();
099            ThreadContainer container = idTable.get(id);
100    
101            // A ThreadContainer exists for this id already. This should be a forward reference, but may
102            // be a duplicate id, in which case we will need to generate a bogus placeholder id
103            if (container != null) {
104                if (container.threadable != null) { // oops! duplicate ids...
105                    id = "<Bogus-id:" + (bogusIdCount++) + ">";
106                    container = null;
107                } else {
108                    // The container just contained a forward reference to this message, so let's
109                    // fill in the threadable field of the container with this message
110                    container.threadable = threadable;
111                }
112            }
113    
114            // No container exists for that message Id. Create one and insert it into the hash table.
115            if (container == null) {
116                container = new ThreadContainer();
117                container.threadable = threadable;
118                idTable.put(id, container);
119            }
120    
121            // Iterate through all of the references and create ThreadContainers for any references that
122            // don't have them.
123            ThreadContainer parentRef = null;
124            {
125                String[] references = threadable.messageThreadReferences();
126                for (int i = 0; i < references.length; ++i) {
127                    String refString = references[i];
128                    ThreadContainer ref = idTable.get(refString);
129    
130                    // if this id doesnt have a container, create one
131                    if (ref == null) {
132                        ref = new ThreadContainer();
133                        idTable.put(refString, ref);
134                    }
135    
136                    // Link references together in the order they appear in the References: header,
137                    // IF they dont have a have a parent already &&
138                    // IF it will not cause a circular reference
139                    if ((parentRef != null)
140                        && (ref.parent == null)
141                        && (parentRef != ref)
142                        && !(ref.findChild(parentRef))) {
143                        // Link ref into the parent's child list
144                        ref.parent = parentRef;
145                        ref.next = parentRef.child;
146                        parentRef.child = ref;
147                    }
148                    parentRef = ref;
149                }
150            }
151    
152            // parentRef is now set to the container of the last element in the references field. make that
153            // be the parent of this container, unless doing so causes a circular reference
154            if (parentRef != null
155                && (parentRef == container || container.findChild(parentRef)))
156                parentRef = null;
157    
158            // if it has a parent already, its because we saw this message in a References: field, and presumed
159            // a parent based on the other entries in that field. Now that we have the actual message, we can
160            // throw away the old parent and use this new one
161            if (container.parent != null) {
162                ThreadContainer rest, prev;
163    
164                for (prev = null, rest = container.parent.child;
165                    rest != null;
166                    prev = rest, rest = rest.next) {
167                    if (rest == container)
168                        break;
169                }
170    
171                if (rest == null) {
172                    throw new RuntimeException(
173                        "Didnt find "
174                            + container
175                            + " in parent"
176                            + container.parent);
177                }
178    
179                // Unlink this container from the parent's child list
180                if (prev == null)
181                    container.parent.child = container.next;
182                else
183                    prev.next = container.next;
184    
185                container.next = null;
186                container.parent = null;
187            }
188    
189            // If we have a parent, link container into the parents child list
190            if (parentRef != null) {
191                container.parent = parentRef;
192                container.next = parentRef.child;
193                parentRef.child = container;
194            }
195        }
196    
197        /**
198         * Find the root set of all existing ThreadContainers
199         * @return root the ThreadContainer representing the root node
200         */
201        private ThreadContainer findRootSet() {
202            ThreadContainer root = new ThreadContainer();
203            Iterator<String> iter = idTable.keySet().iterator();
204    
205            while (iter.hasNext()) {
206                Object key = iter.next();
207                ThreadContainer c = idTable.get(key);
208                if (c.parent == null) {
209                    if (c.next != null)
210                        throw new RuntimeException(
211                            "c.next is " + c.next.toString());
212                    c.next = root.child;
213                    root.child = c;
214                }
215            }
216            return root;
217        }
218    
219        /**
220         * Delete any empty or dummy ThreadContainers
221         * @param parent
222         */
223        private void pruneEmptyContainers(ThreadContainer parent) {
224            ThreadContainer container, prev, next;
225            for (prev = null, container = parent.child, next = container.next;
226                container != null;
227                prev = container,
228                    container = next,
229                    next = (container == null ? null : container.next)) {
230    
231                // Is it empty and without any children? If so,delete it
232                if (container.threadable == null && container.child == null) {
233                    if (prev == null)
234                        parent.child = container.next;
235                    else
236                        prev.next = container.next;
237    
238                    // Set container to prev so that prev keeps its same value the next time through the loop
239                    container = prev;
240                }
241    
242                // Else if empty, with kids, and (not at root or only one kid)
243                else if (
244                    container.threadable == null
245                        && container.child != null
246                        && (container.parent != null
247                            || container.child.next == null)) {
248                    // We have an invalid/expired message with kids. Promote the kids to this level.
249                    ThreadContainer tail;
250                    ThreadContainer kids = container.child;
251    
252                    // Remove this container and replace with 'kids'.
253                    if (prev == null)
254                        parent.child = kids;
255                    else
256                        prev.next = kids;
257    
258                    // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
259                    // i.e. splice kids into the list in place of container
260                    for (tail = kids; tail.next != null; tail = tail.next)
261                        tail.parent = container.parent;
262    
263                    tail.parent = container.parent;
264                    tail.next = container.next;
265    
266                    // next currently points to the item after the inserted items in the chain - reset that so we process the newly
267                    // promoted items next time round
268                    next = kids;
269    
270                    // Set container to prev so that prev keeps its same value the next time through the loop
271                    container = prev;
272                } else if (container.child != null) {
273                    // A real message , with kids
274                    // Iterate over the children
275                    pruneEmptyContainers(container);
276                }
277            }
278        }
279    
280        /**
281         *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
282         */
283        private void gatherSubjects() {
284    
285            int count = 0;
286    
287            for (ThreadContainer c = root.child; c != null; c = c.next)
288                count++;
289    
290            // TODO verify this will avoid rehashing
291            HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
292            count = 0;
293    
294            for (ThreadContainer c = root.child; c != null; c = c.next) {
295                Threadable threadable = c.threadable;
296    
297                // No threadable? If so, it is a dummy node in the root set.
298                // Only root set members may be dummies, and they alway have at least 2 kids
299                // Take the first kid as representative of the subject
300                if (threadable == null)
301                    threadable = c.child.threadable;
302    
303                String subj = threadable.simplifiedSubject();
304    
305                if (subj == null || subj == "")
306                    continue;
307    
308                ThreadContainer old = subjectTable.get(subj);
309    
310                // Add this container to the table iff:
311                // - There exists no container with this subject
312                // - or this is a dummy container and the old one is not - the dummy one is
313                // more interesting as a root, so put it in the table instead
314                // - The container in the table has a "Re:" version of this subject, and
315                // this container has a non-"Re:" version of this subject. The non-"Re:" version
316                // is the more interesting of the two.
317                if (old == null
318                    || (c.threadable == null && old.threadable != null)
319                    || (old.threadable != null
320                        && old.threadable.subjectIsReply()
321                        && c.threadable != null
322                        && !c.threadable.subjectIsReply())) {
323                    subjectTable.put(subj, c);
324                    count++;
325                }
326            }
327    
328            // If the table is empty, we're done
329            if (count == 0)
330                return;
331    
332            // subjectTable is now populated with one entry for each subject which occurs in the
333            // root set. Iterate over the root set, and gather together the difference.
334            ThreadContainer prev, c, rest;
335            for (prev = null, c = root.child, rest = c.next;
336                c != null;
337                prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
338                Threadable threadable = c.threadable;
339    
340                // is it a dummy node?
341                if (threadable == null)
342                    threadable = c.child.threadable;
343    
344                String subj = threadable.simplifiedSubject();
345    
346                // Dont thread together all subjectless messages
347                if (subj == null || subj == "")
348                    continue;
349    
350                ThreadContainer old = subjectTable.get(subj);
351    
352                if (old == c) // That's us
353                    continue;
354    
355                // We have now found another container in the root set with the same subject
356                // Remove the "second" message from the root set
357                if (prev == null)
358                    root.child = c.next;
359                else
360                    prev.next = c.next;
361                c.next = null;
362    
363                if (old.threadable == null && c.threadable == null) {
364                    // both dummies - merge them
365                    ThreadContainer tail;
366                    for (tail = old.child;
367                        tail != null && tail.next != null;
368                        tail = tail.next){}
369    
370                    if (tail != null) { // protect against possible NPE
371                        tail.next = c.child;
372                    }
373    
374                    for (tail = c.child; tail != null; tail = tail.next)
375                        tail.parent = old;
376    
377                    c.child = null;
378                } else if (
379                    old.threadable == null
380                        || (c.threadable != null
381                            && c.threadable.subjectIsReply()
382                            && !old.threadable.subjectIsReply())) {
383                    // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
384                    c.parent = old;
385                    c.next = old.child;
386                    old.child = c;
387                } else {
388                    // else make the old and new messages be children of a new dummy container.
389                    // We create a new container object for old.msg and empty the old container
390                    ThreadContainer newc = new ThreadContainer();
391                    newc.threadable = old.threadable;
392                    newc.child = old.child;
393    
394                    for (ThreadContainer tail = newc.child;
395                        tail != null;
396                        tail = tail.next)
397                        tail.parent = newc;
398    
399                    old.threadable = null;
400                    old.child = null;
401    
402                    c.parent = old;
403                    newc.parent = old;
404    
405                    // Old is now a dummy- give it 2 kids , c and newc
406                    old.child = c;
407                    c.next = newc;
408                }
409                // We've done a merge, so keep the same prev
410                c = prev;
411            }
412    
413            subjectTable.clear();
414            subjectTable = null;
415    
416        }
417    
418    
419        // DEPRECATED METHODS - for API compatibility only - DO NOT USE
420    
421        /**
422         * The client passes in an array of Threadable objects, and
423         * the Threader constructs a connected 'graph' of messages
424         * @param messages array of messages to thread
425         * @return null if messages == null or root.child == null
426         * @deprecated (2.2) prefer {@link #thread(List)}
427         */
428        @Deprecated
429        public Threadable thread(Threadable[] messages) {
430            return thread(java.util.Arrays.asList(messages));
431        }
432    
433    }