Improving Performance of a Tape System:Progress

From CSclasswiki
Jump to: navigation, search


( Back to Main Page )
( Back to Programs and Graphs Page )

Week of 2/27/17 - 3/6/17


  • Generated a simple LRU simulator in Python.


# LRU simulator.py
# a skeleton program for simulating the LRU algorithm.

drive = [-1]*4
tapes = [1,2,0,100,200,0,2,0,1,300,400]
miss = 0
hit = 0
for tape in tapes:
    #look if tapeID is in drive
    #if not,remove the LRU tapeID and put the new tapeID at the front
    #if in the drive, move the tapeID to the front
    if tape not in drive:
        drive = [tape] + drive[0:-1]
        miss += 1
    else:
        index = drive.index(tape)
        drive = [tape] +drive[0:index]+drive[index+1:]
        hit += 1
    print(tape,drive)
print(miss,miss+hit)


Week of 3/6/17 - 3/20/17


  • Generated test version of LRU simulator in Python.


#test.py
#compute miss ratio using LRU algorithm
from __future__ import print_function
from __future__ import division

import math
import sys
import datetime


def main():

    if len( sys.argv ) != 3:
        print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
        return

    noDrives = int( sys.argv[1] )
    fileName = sys.argv[2] 

    print( "No drives:  ", noDrives )
    print( "trace file: ", fileName )

    # initialize the drive
    drive = [-1]*noDrives
    miss = 0
    hit = 0

    # take in tape requests and clean up the requests
    file = open( fileName, 'r' )
    lines = file.readlines()
    file.close()

    #hdre01#VLAD#20140501:005936#  DISMOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674

    tapesD = {}
    for line in lines:
        # clean up the file
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        fields = line.split(" ")
        md = fields[1]
        tapeId = fields[2]

        if md == "DISMOUNT":
            pass
        elif tapeId not in drive:
            drive = [tapeId] + drive[0:-1]
            miss += 1
        else:
            index = drive.index(tapeId)
            drive = [tapeId] +drive[0:index]+drive[index+1:]
            hit += 1

    total_request = hit + miss
    raw_percent = 100*(hit/total_request)
    # miss ratio,precision to 2 decimal places
    percent = round(raw_percent,2)
    # date time
    date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
    f = open("results.txt",'a')
    f.write('\n')
    #append result
    f.write(','.join((date,fileName,str(noDrives),str(total_request),str(percent))))

main()


  • Generated test version of Belady simulator in Python.


# Belady simluator.py
drive = [-1]*4
tapes = ['a','b','c','b','a','d','e','f','a','c','b','d','f','g','h','j','k','l','a','b','f','a','d']
miss = 0
hit = 0
# to record tapes' index in dictionary keys
tapeIndex = 0
# to remove the tapes' index that has been inserted/removed
currentIndex = 0
# to find out which tape will not be called in near future
farthestIndex = 0
# to record the index of this tape
farthestTapeIndex = 0

tapesD = {}
isFound = False
# record all tapes’index in dictionary
for tape in tapes:
    if tape not in tapesD.keys():
        tapesD[ tape ] = [tapeIndex]
       
    else:
        tapesD[ tape ].append(tapeIndex)

    tapeIndex += 1

print(tapesD)


for tape in tapes:
    # if tape is not in drive and drive has empty space
    if tape not in drive and drive[3]== -1:
        drive = [tape] + drive[0:-1]
        miss += 1

    # if tape is not in drive and drive is full
    elif tape not in drive:
        # check if any tape will never be needed again
        # if found, replace it with the new tape
        for k in range(0,len(drive)):
            if len(tapesD[drive[k]])==0:
                drive[k] = tape
                miss +=1
                isFound = True
                break

        # if all tapes will be needed again
        # find out which tape will be called relatively late
        if isFound == False:        
            print("pre-Belady",drive)
            for k in range(0,len(drive)):
                if farthestIndex < tapesD[drive[k]][0]:
                    farthestIndex = tapesD[drive[k]][0]
                    # record the index of the tape in drive,helping the future replacement
                    farthestTapeIndex = k
            drive[farthestTapeIndex] = tape
            miss +=1
            print("after-Belady ",drive)
            print("why remove",tapesD)

    # if tape requested is in drive
    else:      
        index = drive.index(tape)
        drive = [tape] +drive[0:index]+drive[index+1:]
        hit += 1
    # remove the tape index that has already been inserted
    tapesD[ tape ].remove(currentIndex)
    # update which tape is currently dealt with
    currentIndex +=1
    isFound = False


Week of 3/20/17 - 3/27/17


  • Generated documented version of LRU simulator in Python.


# LRU_simulator.py
# Nancy Xiao
# this program uses traces of tape requests at the European Centre for Medium-Range Weather Forecasts 
# (data can be found in the paper "Analysis of the ECMWF Storage Landscape" by Grawinkel et al)

# the program simulates the tape system by implementing Least Recently Used algorithm
# that is to say, whenever the tape drive is full and the system needs to insert a new tape
# the system will replace the least recently used tape with the new tape

# the line below shows a typical tape request
# hdre01#VLAD#20140501:005936# MOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674 
# time of the request: hdre01#VLAD#20140501:005936#
# to mount or dismount a tape: MOUNT
# tape ID: RC1021
# where the tape is fetched: Home 1,14,5,6,1
# insert to which drive: Drive 1,14,1,15
# IP address of the requester: Client Host Id 328.681.634.674

# to use the program, enter the size of drives and the name of the trace file in command line
# when the program finishes, it will print out a report for the trace file
# the report format will be: date, file name, drive size, number of total requests,miss ratio

from __future__ import print_function
from __future__ import division

import math
import sys
import datetime
DEBUG = False

def main():

    if len( sys.argv ) != 3:
        print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
        return

    # get command line parameters
    noDrives = int( sys.argv[1] )
    fileName = sys.argv[2] 

    print( "No drives:  ", noDrives )
    print( "trace file: ", fileName )

    # initialize the drives
    drive = [-1]*noDrives
    miss = 0
    hit = 0

    # take in tape requests and clean up the requests
    file = open( fileName, 'r' )
    lines = file.readlines()
    file.close()

    
    tapesD = {}
    for line in lines:
        # clean up the file
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        fields = line.split(" ")
        md = fields[1]
        tapeId = fields[2]
        if DEBUG:
            print(md,tapeId)

        if md == "DISMOUNT":
            pass
        elif tapeId not in drive:
            drive = [tapeId] + drive[0:-1]
            miss += 1
        else:
            # implements LRU
            index = drive.index(tapeId)
            drive = [tapeId] +drive[0:index]+drive[index+1:]
            hit += 1

        if DEBUG:
            print(drive)

    total_request = hit + miss
    raw_percent = 100*(miss/total_request)

    # miss ratio,precision to 2 decimal places
    percent = round(raw_percent,2)

    # current date time
    date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
    f = open("results.txt",'a')
    f.write('\n')

    #append result
    result = (', '.join((date,fileName,str(noDrives),str(total_request),str(hit),str(miss),str(percent))))
    f.write(result)
    f.close()
    print(result)
main()


  • Generated documented version of Belady simulator in Python.


# Belady_simulator.py
# Nancy Xiao
# this program uses traces of tape requests at the European Centre for Medium-Range Weather Forecasts 
# (data can be found in the paper "Analysis of the ECMWF Storage Landscape" by Grawinkel et al)

# the program simulates the tape system by implementing Belady algorithm
# that is to say, whenever the tape drive is full and the system needs to insert a new tape
# the system will look for tape that will never be used in the future or will not be used in the near future

# the line below shows a typical tape request
# hdre01#VLAD#20140501:005936# MOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674 
# time of the request: hdre01#VLAD#20140501:005936#
# to mount or dismount a tape: MOUNT
# tape ID: RC1021
# where the tape is fetched: Home 1,14,5,6,1
# insert to which drive: Drive 1,14,1,15
# IP address of the requester: Client Host Id 328.681.634.674

from __future__ import print_function
from __future__ import division

import math
import sys
import datetime


drive = []
miss = 0
hit = 0
DEBUG = False

def displayDict( tapesD ):
    keys = list( tapesD.keys() )
    keys.sort()
    if DEBUG:
        print( "Dictionary" )
        print( "--------------------------------------------" )
        for key in keys:
            print( "%2s: %s" % (key, ", ".join( [str(k) for k in tapesD[key]] ) ) )

        print( "--------------------------------------------" )

def main():
    global drive, tapes, miss, hit
    global DEBUG

    if len( sys.argv ) != 3:
        print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
        return

    # get command line parameters
    noDrives = int( sys.argv[1] )
    fileName = sys.argv[2] 

    print( "No drives:  ", noDrives )
    print( "trace file: ", fileName )

    # initialize the drives
    drive = [-1]*noDrives

    # take in tape requests and clean up the requests
    file = open( fileName, 'r' )
    lines = file.readlines()
    file.close()
    
    # to record tapes' index in dictionary keys
    tapeIndex = 0
    # to remove the tapes' index that has been inserted/removed
    currentIndex = 0
    # to find out which tape will not be called in near future
    farthestIndex = 0
    # to record the index of this tape
    farthestTapeIndex = 0

     
    tapesD = {}
    isFound = False

    for line in lines:
        # clean up the file
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        fields = line.split(" ")
        md = fields[1]
        tape = fields[2]      
        if md != "DISMOUNT":
            # record all tapes'index in dictionary
            if tape not in tapesD.keys():
                tapesD[ tape ] = [tapeIndex]
            else:
                tapesD[ tape ].append(tapeIndex)
            tapeIndex += 1
            
    displayDict(tapesD)  
    

    for line in lines :
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        line = line.replace( "  ", " " )
        fields = line.split(" ")
        md = fields[1]
        tape = fields[2]

        if md != "DISMOUNT":
            # if tape requested is in drive
            if tape in drive:
                hit += 1
                index = drive.index(tape)
                drive = [tape] +drive[0:index]+drive[index+1:] 
                if DEBUG: print( "  %s --> Hit" % str(drive) )

            # if tape is not in drive and drive has empty space  
            elif tape not in drive and drive[noDrives-1]== -1:
                miss += 1
                drive = [tape] + drive[0:-1]
                if DEBUG: print( "  %s --> Miss" % str( drive ) )
                    
            # if tape is not in drive and drive is full
            elif tape not in drive:
                miss += 1
                # check if any tape will never be needed again
                # if found, replace it with the new tape
                for k in range(0,len(drive)):
                    if len(tapesD[drive[k]])==0:
                        drive[k] = tape
                        isFound = True
                        if DEBUG: print( "  %s --> Miss (isFound:T)" % str( drive ) )
                        break
             
                # if all tapes will be needed again
                # find out which tape will be called relatively late
                if isFound == False:        
                    for k in range(0,len(drive)):
                        if farthestIndex < tapesD[drive[k]][0]:
                            farthestIndex = tapesD[drive[k]][0]
                            # record the index of the tape in drive,helping the future replacement
                            farthestTapeIndex = k
                            if DEBUG: print( "  farthestTapeIndex =", k )

                    drive[farthestTapeIndex] = tape
                    if DEBUG: print( "  Updated drive =", str( drive ) )
                 

                
            # remove the tape index that has already been inserted
            tapesD[ tape ].remove(currentIndex)
            # update which tape is currently dealt with
            currentIndex +=1
            # set the boolean to its original state
            isFound = False

            if DEBUG: print( "===> current index %d: current tape: %2s " % (currentIndex, tape) )
            if DEBUG: displayDict( tapesD )

    total_request = hit + miss
    raw_percent = 100*(miss/total_request)

    # miss ratio,precision to 2 decimal places
    percent = round(raw_percent,2)
    
    # current date time
    date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
    f = open("results.txt",'a')
    f.write('\n')

    #append result
    result = (', '.join((date,fileName,str(noDrives),str(total_request),str(hit),str(miss),str(percent))))
    result = "Belady " + result
    f.write(result)
    f.close()
    print(result)
     
main()


  • Generated a graph showing miss ratio of LRU for 5 traces.
MissRatioGraphFor5Traces.png