CSC/ECE 517 Summer 2008/wiki1 4 wm: Difference between revisions

From Expertiza_Wiki
Jump to navigation Jump to search
No edit summary
Line 226: Line 226:
==Java vs. Ruby Thread Efficiency==
==Java vs. Ruby Thread Efficiency==
In order to test the efficiency of Java and Ruby threads a program was written in each language that reads in the contents of five different text files and searches for a given word in each and counts each occurrence of the word.  This count is then added to a total count for all five files.  Each file is passed to its own thread to run.  The main thread has the total count, which is updated using mutual exclusion techniques in each language.  The times it took each to run depended on the size of the files.  The results are recorded in Table 4.  With very small files, the Ruby code ran twice as fast of the Java.  With the medium sized files the Java ran twice as fast as the Ruby code.  With the larger files, the Java code ran three times faster.  Certainly, the Java threads outperform the Ruby threads with I/O intensive code.  The Java code is in Figure 8 and the Ruby code is in Figure 9.
In order to test the efficiency of Java and Ruby threads a program was written in each language that reads in the contents of five different text files and searches for a given word in each and counts each occurrence of the word.  This count is then added to a total count for all five files.  Each file is passed to its own thread to run.  The main thread has the total count, which is updated using mutual exclusion techniques in each language.  The times it took each to run depended on the size of the files.  The results are recorded in Table 4.  With very small files, the Ruby code ran twice as fast of the Java.  With the medium sized files the Java ran twice as fast as the Ruby code.  With the larger files, the Java code ran three times faster.  Certainly, the Java threads outperform the Ruby threads with I/O intensive code.  The Java code is in Figure 8 and the Ruby code is in Figure 9.
'''Figure 8: Java Word Counts Code'''
    public class WordCounts {
        protected static int wordCount;
       
        private class Semaphore {
            private int mutex; 
     
            public Semaphore () {
                mutex = 1;
            }
     
            public synchronized void acquire() throws InterruptedException {
                while (mutex == 0) {
                  wait();
                }
                mutex--;
            }
            public synchronized void release() {
                mutex++;   
                notifyAll();
            }
        }
   
        private class WordCountsThread implements Runnable {
            private String word;
            private String file;       
            private int count;
            private Semaphore total;
                 
            public WordCountsThread (String word, String file, Semaphore total) {
                this.word  = word;
                this.file  = file;
                this.total = total;
                count      = 0;
            }   
   
            public void run(){
                readWordFile();
       
                try {
                    total.acquire();
                    wordCount = wordCount + count;
                    total.release();
                } catch (InterruptedException e) {
                    System.out.println (e);
                }
            }
   
            private void readWordFile (){
            }  // end method
        } //end inner class: WordCountThread
   
   
        public WordCounts (String searchWord, String [] files) {
            int numFiles = files.length;
            Semaphore total = new Semaphore();
            Thread [] threads = new Thread [numFiles];
         
            for (int i = 0; i < numFiles; i++ ) {
                threads[i] = new Thread (new WordCountsThread (searchWord,files[i], total));
            }
       
            for (int j = 0; j < numFiles; j++ ) {
                threads[j].start();
            }
       
            try {
                for (int i = 0; i < numFiles; i++ ) {
                    threads[i].join();
                }
            } catch (InterruptedException e) {
                System.out.println (e);
            }
        }
       
        public void displayTotal (String searchWord) {
            System.out.println ("There are " + wordCount +
                            " total occurrences of " + searchWord);
        }
   
        public static void main (String [] args) {
            String [] files;             
            files = new String [5]; 
            files [0] = "Player.java";
            files [1] = "KingMove.java";
            files [2] = "NodeDeque.java";
            files [3] = "Position.java";
            files [4] = "DoubleDigest.java";
            String searchWord = "public";
            long time = System.currentTimeMillis (), time_prev = time;
            WordCounts wc = new WordCounts (searchWord, files);
            wc.displayTotal (searchWord);
            time = System.currentTimeMillis ();
            System.out.println ("Diff " + (time - time_prev) + " msecs");
            System.exit(0);
        }
    }

Revision as of 17:50, 6 June 2008

Introduction

A thread is a basic unit of CPU utilization. A traditional process has a single thread of control. Many modern operating systems provide features enabling a process to contain multiple threads of control. If the process has multiple threads of control, it can do more than one task at a time, such as, displaying graphics and reading keystrokes. Threads are used mainly to run asynchronous tasks and pass the application’s tasks to an executor. They are useful because they reduce the overall time of execution of programs. All threads belonging to the same process share its code section, data section and other operating system resources, such as open files and signals. Thus multithreading is more efficient than having parallel processes running for the same program, which require a huge overhead.

Multi-threaded Programming

Multithreading support for single processors works by giving each of the threads a “time slice” of the CPU, similar to the parallel execution of processes. On multi-core machines, different threads can run on the different processors and the threads truly run simultaneously. Support for threads may be provided at either the user level, for user or green threads, or by the kernel, for kernel or native threads. User threads are supported above the kernel and are managed without kernel support, whereas kernel threads are supported and managed directly by the operating system.

Multi-threading Models

There are three common multithreading models

One-to-One

This model maps each user-thread to a kernel thread. It provides more concurrency than the many-to-one model by allowing another thread to run when a thread makes a blocking system call. It also allows multiple threads to run in parallel on multiprocessors. The main disadvantage of this model is the overhead of creating each kernel thread for each user thread. This burdens the performance of an application and limits the number of threads supported by the system.

Many-to-One

This model maps many user-level threads to one kernel thread. It is the model used by green threads. Green threads are scheduled by a Virtual Machine instead of natively by the OS. They emulate multithreaded environments without relying on any native OS capabilities. These user-level threads are lightweight and very efficient. They are an easy way to achieve parallelism in a program. An application can have as many user-level threads as it needs, but true concurrency is not achieved because the kernel can schedule only one thread at a time. The developers can create as many user threads as necessary. However, there are two main disadvantages: 1) If a thread makes a blocking system call, the entire process will block. 2) If you are running on a multi-core machine, the multiple threads are unable to run in parallel.

Many-to-Many

This model multiplexes many user-level threads to a smaller or equal number of kernel threads. This model has the best of both worlds. Developers can create as many user threads as they need. When a user thread performs a blocking system call, the kernel can schedule another thread for execution and the kernel threads can run in parallel on multi-core systems.

Benefits of Multi-threading

Resource sharing

Memory, code and process resource sharing - allocating memory and resources for process creation is costly and time consuming. It is more economical to create threads than new processes. Applications can benefit by having several different threads of activity within the same address space.

Responsiveness

Multithreading an interactive application may allow a program to continue running, even if part of it is blocked doing IO or performing a lengthy operation, thereby increasing responsiveness to the user. A web browser might have one thread display images or text while another thread retrieves data from the network. A busy web server may have many clients concurrently accessing it. If the web server ran as a traditional single-threaded process, it would be able to service only one client at a time.

Potential Problems of Multi-threading

Many threaded programs have shared resources that must be accessed by more than one thread. These programs must implement a form of mutual exclusion to ensure that only one of the threads can access the resource at a time. These shared resources must execute in code called critical sections. These critical sections must be protected by either hardware or software means.

Java Threads

The Java programming language has a thread library for creating and managing threads at the user-level. The Java virtual machine (JVM) manages the mapping of the user threads to the operating system kernel threads. The particular mapping model that is used depends on the operating system on which the Java program is running. Java’s Thread Scheduler monitors all the threads running in Java programs. It decides which threads to run at any given time and when to switch between threads to run based on the thread’s priority with higher running threads running before lower priority threads. The Java scheduler uses either preemptive or non-preemptive scheduling based on the operating system on which it is running. With preemptive scheduling, each thread is given a constant period of time to run, after which time the thread will be suspended() to allow the next thread to resume() running. With non-preemptive scheduling, the running thread is allowed to run until the thread completes or until it issues a yield() to allow other threads to run while it waits for some other processing to occur. A Java thread can be created either by having your Java class extend the Java Thread class or by having your Java class implement the Runnable interface and coding a run() method. The two options are shown in Figures 1 and 2. The preferred method for creating a thread is by implementing the Runnable interface. It is more flexible and useful in complex applications. Java does not support multiple inheritance, but a class may implement multiple interfaces. By using the interface method, the Client class is open to inherit another class, if the need arose in the future.

Figure 1: Client Extends Thread Class

   public class Client extends Thread {
       public void run() {
           System.out.println(“Client thread”);
       }
   }
   public class Server {
       public static void main(String args[]){
           Thread runner = new Client();
           runner.start();
           System.out.println(“Server thread”);
       }
   }


Figure 2: Client Implements Runnable Interface

   public class Client implements Runnable {
       public void run() {
           System.out.println(“Client thread”);
       }
   }
  
   public class Server {
   
       public static void main(String args[]) {
           Thread runner = new Thread(new Client());
           runner.start();
           System.out.println(“Server thread”);
       }
   }

If the main Java thread wants to wait for any threads it creates to finish their run() method, the thread method join() can be executed. The join() method is useful in situations where the creating thread can continue only after a worker thread has completed. Java threads can be asynchronously terminated using the stop() method. However, this method has been deprecated and its used is discouraged. The preferred cancellation technique is to have the target thread periodically check whether it should terminate by using the interrupt() method.

States of Java Threads

New

A thread is in this state when the thread object is first created with the new() method.

Runnable

Calling the start() method allocates memory for the new thread in the JVM and calls the run() method for the thread object.

Blocked

A thread becomes blocked if it performs a blocking statement, such as doing I/O or if it invokes a sleep() method.

Dead

A thread moves to the dead state when its run() method terminates.

Mutual Exclusion

The Java language implements mutual exclusion in a couple of ways. There exists a MutualExclusion interface. The Client class can implement this interface and code the methods, enteringCriticalSection() and leavingCriticalSection(), as shown in Figure 3. The yield() method tells the Thread Scheduler to allow another thread to run.

Figure 3: Client Implements MutualExclusion Interface

   public class Client implements MutualExclusion{
   
       public Client(){
           flag0 = false; flag1 = false; turn = TURN_0;
       }
       
       public void enteringCriticalSection(int t) {
           int other = 1 - t;
           
           if (t == 0) {
               flag0 = true; turn = other;
               while ((flag1 == true) && (turn == other))
                   Thread.yield();
            } else {
               flag1 = true; turn = other;
    	        while ((flag0 == true) && (turn == other))
                   Thread.yield();
            }
       }
       
       public void leavingCriticalSection(int t) {
           if(t == 0) flag0 = false;
           else   flag1 = false;
       }
       private volatile int turn, boolean flag0, flag1;
   }

An alternative method of handling mutual exclusion is to code a synchronization tool called a semaphore. A semaphore is variable that is accessed only through two standard operations: acquire() and release(). The semaphore is acquired, then the critical section of an object is entered and after the critical code completes, the semaphore is released. An example of this code is in Figure 4.

Figure 4: Client Has Semaphore

   public class Client implements Runnable {
   
       public Client (Semaphore sem) {
           this.sem = sem;
       }
      
       public void run() {
           sem.acquire();
           System.out.println("Entering critical section");
           SleepUtilities.nap(3);
           System.out.println("Leaving critical section");
           sem.release();
       }
       private Semaphore sem;
   }

Ruby Threads

At the present time the Ruby programming language just supports green threads. These are totally implemented within the Ruby interpreter. The threads, as well as the thread scheduler run on the same single operating system thread. Ruby can support thread methods, such as stop() and kill(), that are not advised for kernel threads. However, Ruby threads suffer from the drawbacks of non-native threads. They can only run on one processor in a multiprocessor environment. It is possible for a single thread to cause the whole process to deadlock, if poorly designed. The worse problem with Ruby threads is with I/O, especially network access, since it has the potential to block a process for a long time. This problem is solvable by using non-blocking I/O. However, one problem that is not solvable using non-blocking I/O, is the DNS lookup system call. Ruby solves this problem with their resolv library, which was written specifically to address the threading issue with the standard OS DNA lookup mechanism. See Figure 5 for an example of the code that handles this issue. Despite some of the green thread concerns, for most situations, the benefits of efficiency can far outweigh the disadvantages.

Figure 5: Ruby DNS Lookup Implementation

   require ‘socket’
   require ‘resolv-replace’
   count = 0
   Thread.critical = true
   thread = Thread.new { Thread.pass; loop { count += 1; } } IPSocket.getaddress(www.ruby-lang.org) 
   count


A Ruby thread is created using Thread.new() {block}. The arguments given in the new() method are passed to the block code that the thread executes. See Figure 6 for an example of creating a Ruby thread. The Ruby pass() method is analogous to the Java yield() method. It tells the thread scheduler to pass execution to another thread. Ruby has a join() method that performs the same functionality as the Java join(). There are run() and wakeup() methods that wake up a sleeping threads, either giving it control or indicating that it is ready to be scheduled.

Figure 6: Creating Ruby Threads

   threads = []
   4.times do |number|
       threads << Thread.new(number) do |i|
           print “#{i}\n”    
       end 
   end
   threads.each { |t| t.join }  

States of Ruby Threads

Run

The thread is executing.

Sleep

The thread is sleeping or waiting on I/O.

Aborting

The thread is aborting (has been killed).

False

The thread has terminated normally.

Nil

The thread has terminated with an exception.

Mutual Exclusion

The best way to handle mutual exclusion in Ruby is with the Monitor class. A monitor has a synchronization() method that prevent access to a resource by another thread while one is execution in that method. Figure 7 shows an example of how this can be used.

Figure 7: Ruby Monitor

   require 'monitor'
       class Counter < Monitor
           attr_reader :count
       
           def initialize
               @count = 0
               super
           end

           def tick
               synchronize do
               @count += 1
           end
       end
   end
   c = Counter.new
   t1 = Thread.new { 10000.times {  c.tick } } 
   t2 = Thread.new { 10000.times {  c.tick } }
   t1.join; t2.join
   c.count

Java vs Ruby Threads

Java thread initialization is more involved than Ruby thread initialization. Ruby threads also have the advantage of a “quick and dirty” thread creation, since it only takes couple of lines of code. A Ruby thread shares all global, instance and local variables that are in existence at the time the thread starts. A Java thread can share these variables, as well, depending on how the thread was created. If the client thread class uses inheritance, any variables needed are passed along when creating the thread. If the thread is created through the interface, the variables can be shared as long as the class has been initialized and the variables in use.

Ruby’s green threads are completely portable, as they don’t rely on the OS. But, on a multi-core processor, native thread implementations can assign work to multiple processors while green threads cannot. In this environment native threads have a huge advantage as more work is done by the native threads.

Java and Ruby Methods Comparison

Table 1 lists thread actions and Java and Ruby’s equivalent methods of achieving them.

Table 2 lists thread actions available in Java, but Ruby doesn’t have a method for the action.


Table 3 lists thread actions available in Ruby, but Java doesn’t have a method for the action.

There are many similarities between the Java and Ruby Thread classes, as shown with the equivalent methods in Table 1. But, as you can see, there are also many methods in Java that Ruby doesn't support and vice versa. Some methods like activeCount(), where it returns the number of active threads, are trivial, but there is a group of methods existing in both Java and Ruby that the other doesn't support. For Java, it is exception handling with getting/setting exceptions, while in Ruby, it is more thread control. Ruby has exception methods, but they are geared more toward events prior to the exception, like making sure all threads abort when exception starts and firing an exception to destroy threads. Java is more about setting the exceptions and getting them to change accordingly. So the Java Thread class has a little bit more exception handling than Ruby.

Ruby has more thread control for the programmer such as pausing, terminating, aborting and resuming threads. Java did have these functions, but they all have been deprecated because, although they give the user more control, they also can cause deadlock problems. So it is a tradeoff between more thread control and less chance for deadlocks to happen. Java does use interrupt to "pause" the thread as opposed to actually pausing the thread as in Ruby. Also, Java threads "sleep" differently than Ruby threads. Java sleeps are timed, while Ruby's sleep just stops the thread indefinitely, until it gets woken up by "thr.wakeup". As you can see Ruby's thread methods are more flexible, but potentially dangerous than Java’s.

Basically, Java and Ruby both have similar thread functionalities. In some respects, Java is simpler by reducing user error, while Ruby gives the programmer more power and perhaps more headaches. If user wants more control, then Ruby threading is great, but for simplicity Java is better. The main distinction between Ruby threads and Java threads are that currently Ruby just supports green threads (as did Java 1.1) and Java has support for kernel threads. Currently, there is no clear “winner” between native and green threads in a uni-processor system, but Java threads definitely have the edge over Ruby in a multi-processor environment. Ruby is moving from green threads to kernel threads in Ruby 1.9 or 2.0, which are still development releases. YARV has been integrated as the new Ruby VM. YARV will give Ruby kernel thread support. Detailed discussions of this change are at the following links: http://www.infoq.com/news/2007/05/ruby-threading-futures http://expressica.com/2008/04/26/new-in-ruby-19-threads/

Java vs. Ruby Thread Efficiency

In order to test the efficiency of Java and Ruby threads a program was written in each language that reads in the contents of five different text files and searches for a given word in each and counts each occurrence of the word. This count is then added to a total count for all five files. Each file is passed to its own thread to run. The main thread has the total count, which is updated using mutual exclusion techniques in each language. The times it took each to run depended on the size of the files. The results are recorded in Table 4. With very small files, the Ruby code ran twice as fast of the Java. With the medium sized files the Java ran twice as fast as the Ruby code. With the larger files, the Java code ran three times faster. Certainly, the Java threads outperform the Ruby threads with I/O intensive code. The Java code is in Figure 8 and the Ruby code is in Figure 9.


Figure 8: Java Word Counts Code

   public class WordCounts {
       protected static int wordCount;
       
       private class Semaphore {
           private int mutex;  
     
           public Semaphore () {
               mutex = 1;
           }
      
           public synchronized void acquire() throws InterruptedException {
               while (mutex == 0) {
                  wait();
               }
               mutex--;
           }
           public synchronized void release() {
               mutex++;    
               notifyAll();
           }
       }
   
       private class WordCountsThread implements Runnable {
           private String word;
           private String file;        
           private int count;
           private Semaphore total; 
                  
           public WordCountsThread (String word, String file, Semaphore total) {
               this.word  = word; 
               this.file  = file;
               this.total = total;
               count      = 0;
           }    
    
           public void run(){
               readWordFile();
       
               try {
                   total.acquire();
                   wordCount = wordCount + count;
                   total.release();
               } catch (InterruptedException e) {
                   System.out.println (e);
               }
           }
    
           private void readWordFile (){
           }   // end method
       } //end inner class: WordCountThread
    
   
       public WordCounts (String searchWord, String [] files) {
           int numFiles = files.length;
           Semaphore total = new Semaphore();
           Thread [] threads = new Thread [numFiles];
         
           for (int i = 0; i < numFiles; i++ ) {
               threads[i] = new Thread (new WordCountsThread (searchWord,files[i], total));
           }
       
           for (int j = 0; j < numFiles; j++ ) {
               threads[j].start();
           }
       
           try {
               for (int i = 0; i < numFiles; i++ ) {
                   threads[i].join();
               } 
           } catch (InterruptedException e) {
               System.out.println (e);
           }
       }
       
       public void displayTotal (String searchWord) {
           System.out.println ("There are " + wordCount + 
                           " total occurrences of " + searchWord);
       }
   
       public static void main (String [] args) {
           String [] files;              
           files = new String [5];   
           files [0] = "Player.java";
           files [1] = "KingMove.java";
           files [2] = "NodeDeque.java";
           files [3] = "Position.java";
           files [4] = "DoubleDigest.java";
           String searchWord = "public";
           long time = System.currentTimeMillis (), time_prev = time;
           WordCounts wc = new WordCounts (searchWord, files);
           wc.displayTotal (searchWord);
           time = System.currentTimeMillis ();
           System.out.println ("Diff " + (time - time_prev) + " msecs");
           System.exit(0);
       }
   }