Improving Performance of a Tape System:Progress
( Back to Main Page )
( Back to Programs and Graphs Page )
Week of 2/27/17 - 3/6/17
- Generated a simple LRU simulator in Python.
# LRU simulator.py
# a skeleton program for simulating the LRU algorithm.
drive = [-1]*4
tapes = [1,2,0,100,200,0,2,0,1,300,400]
miss = 0
hit = 0
for tape in tapes:
#look if tapeID is in drive
#if not,remove the LRU tapeID and put the new tapeID at the front
#if in the drive, move the tapeID to the front
if tape not in drive:
drive = [tape] + drive[0:-1]
miss += 1
else:
index = drive.index(tape)
drive = [tape] +drive[0:index]+drive[index+1:]
hit += 1
print(tape,drive)
print(miss,miss+hit)
Week of 3/6/17 - 3/20/17
- Generated test version of LRU simulator in Python.
#test.py
#compute miss ratio using LRU algorithm
from __future__ import print_function
from __future__ import division
import math
import sys
import datetime
def main():
if len( sys.argv ) != 3:
print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
return
noDrives = int( sys.argv[1] )
fileName = sys.argv[2]
print( "No drives: ", noDrives )
print( "trace file: ", fileName )
# initialize the drive
drive = [-1]*noDrives
miss = 0
hit = 0
# take in tape requests and clean up the requests
file = open( fileName, 'r' )
lines = file.readlines()
file.close()
#hdre01#VLAD#20140501:005936# DISMOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674
tapesD = {}
for line in lines:
# clean up the file
line = line.replace( " ", " " )
line = line.replace( " ", " " )
line = line.replace( " ", " " )
line = line.replace( " ", " " )
fields = line.split(" ")
md = fields[1]
tapeId = fields[2]
if md == "DISMOUNT":
pass
elif tapeId not in drive:
drive = [tapeId] + drive[0:-1]
miss += 1
else:
index = drive.index(tapeId)
drive = [tapeId] +drive[0:index]+drive[index+1:]
hit += 1
total_request = hit + miss
raw_percent = 100*(hit/total_request)
# miss ratio,precision to 2 decimal places
percent = round(raw_percent,2)
# date time
date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
f = open("results.txt",'a')
f.write('\n')
#append result
f.write(','.join((date,fileName,str(noDrives),str(total_request),str(percent))))
main()
- Generated test version of Belady simulator in Python.
# Belady simluator.py
drive = [-1]*4
tapes = ['a','b','c','b','a','d','e','f','a','c','b','d','f','g','h','j','k','l','a','b','f','a','d']
miss = 0
hit = 0
# to record tapes' index in dictionary keys
tapeIndex = 0
# to remove the tapes' index that has been inserted/removed
currentIndex = 0
# to find out which tape will not be called in near future
farthestIndex = 0
# to record the index of this tape
farthestTapeIndex = 0
tapesD = {}
isFound = False
# record all tapes’index in dictionary
for tape in tapes:
if tape not in tapesD.keys():
tapesD[ tape ] = [tapeIndex]
else:
tapesD[ tape ].append(tapeIndex)
tapeIndex += 1
print(tapesD)
for tape in tapes:
# if tape is not in drive and drive has empty space
if tape not in drive and drive[3]== -1:
drive = [tape] + drive[0:-1]
miss += 1
# if tape is not in drive and drive is full
elif tape not in drive:
# check if any tape will never be needed again
# if found, replace it with the new tape
for k in range(0,len(drive)):
if len(tapesD[drive[k]])==0:
drive[k] = tape
miss +=1
isFound = True
break
# if all tapes will be needed again
# find out which tape will be called relatively late
if isFound == False:
print("pre-Belady",drive)
for k in range(0,len(drive)):
if farthestIndex < tapesD[drive[k]][0]:
farthestIndex = tapesD[drive[k]][0]
# record the index of the tape in drive,helping the future replacement
farthestTapeIndex = k
drive[farthestTapeIndex] = tape
miss +=1
print("after-Belady ",drive)
print("why remove",tapesD)
# if tape requested is in drive
else:
index = drive.index(tape)
drive = [tape] +drive[0:index]+drive[index+1:]
hit += 1
# remove the tape index that has already been inserted
tapesD[ tape ].remove(currentIndex)
# update which tape is currently dealt with
currentIndex +=1
isFound = False
Week of 3/20/17 - 3/27/17
- Generated documented version of LRU simulator in Python.
# LRU_simulator.py
# Nancy Xiao
# this program uses traces of tape requests at the European Centre for Medium-Range Weather Forecasts
# (data can be found in the paper "Analysis of the ECMWF Storage Landscape" by Grawinkel et al)
# the program simulates the tape system by implementing Least Recently Used algorithm
# that is to say, whenever the tape drive is full and the system needs to insert a new tape
# the system will replace the least recently used tape with the new tape
# the line below shows a typical tape request
# hdre01#VLAD#20140501:005936# MOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674
# time of the request: hdre01#VLAD#20140501:005936#
# to mount or dismount a tape: MOUNT
# tape ID: RC1021
# where the tape is fetched: Home 1,14,5,6,1
# insert to which drive: Drive 1,14,1,15
# IP address of the requester: Client Host Id 328.681.634.674
# to use the program, enter the size of drives and the name of the trace file in command line
# when the program finishes, it will print out a report for the trace file
# the report format will be: date, file name, drive size, number of total requests,miss ratio
from __future__ import print_function
from __future__ import division
import math
import sys
import datetime
DEBUG = False
def main():
if len( sys.argv ) != 3:
print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
return
# get command line parameters
noDrives = int( sys.argv[1] )
fileName = sys.argv[2]
print( "No drives: ", noDrives )
print( "trace file: ", fileName )
# initialize the drives
drive = [-1]*noDrives
miss = 0
hit = 0
# take in tape requests and clean up the requests
file = open( fileName, 'r' )
lines = file.readlines()
file.close()
tapesD = {}
for line in lines:
# clean up the file
line = line.replace( " ", " " )
line = line.replace( " ", " " )
line = line.replace( " ", " " )
fields = line.split(" ")
md = fields[1]
tapeId = fields[2]
if DEBUG:
print(md,tapeId)
if md == "DISMOUNT":
pass
elif tapeId not in drive:
drive = [tapeId] + drive[0:-1]
miss += 1
else:
# implements LRU
index = drive.index(tapeId)
drive = [tapeId] +drive[0:index]+drive[index+1:]
hit += 1
if DEBUG:
print(drive)
total_request = hit + miss
raw_percent = 100*(miss/total_request)
# miss ratio,precision to 2 decimal places
percent = round(raw_percent,2)
# current date time
date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
f = open("results.txt",'a')
f.write('\n')
#append result
result = (', '.join((date,fileName,str(noDrives),str(total_request),str(hit),str(miss),str(percent))))
f.write(result)
f.close()
print(result)
main()
- Generated documented version of Belady simulator in Python.
# Belady_simulator.py
# Nancy Xiao
# this program uses traces of tape requests at the European Centre for Medium-Range Weather Forecasts
# (data can be found in the paper "Analysis of the ECMWF Storage Landscape" by Grawinkel et al)
# the program simulates the tape system by implementing Belady algorithm
# that is to say, whenever the tape drive is full and the system needs to insert a new tape
# the system will look for tape that will never be used in the future or will not be used in the near future
# the line below shows a typical tape request
# hdre01#VLAD#20140501:005936# MOUNT RC1021 Home 1,14,5,6,1 Drive 1,14,1,15 Client Host Id 328.681.634.674
# time of the request: hdre01#VLAD#20140501:005936#
# to mount or dismount a tape: MOUNT
# tape ID: RC1021
# where the tape is fetched: Home 1,14,5,6,1
# insert to which drive: Drive 1,14,1,15
# IP address of the requester: Client Host Id 328.681.634.674
from __future__ import print_function
from __future__ import division
import math
import sys
import datetime
drive = []
miss = 0
hit = 0
DEBUG = False
def displayDict( tapesD ):
keys = list( tapesD.keys() )
keys.sort()
if DEBUG:
print( "Dictionary" )
print( "--------------------------------------------" )
for key in keys:
print( "%2s: %s" % (key, ", ".join( [str(k) for k in tapesD[key]] ) ) )
print( "--------------------------------------------" )
def main():
global drive, tapes, miss, hit
global DEBUG
if len( sys.argv ) != 3:
print( "Syntax: python testCommandLineArgs.py noDrives traceFileName" )
return
# get command line parameters
noDrives = int( sys.argv[1] )
fileName = sys.argv[2]
print( "No drives: ", noDrives )
print( "trace file: ", fileName )
# initialize the drives
drive = [-1]*noDrives
# take in tape requests and clean up the requests
file = open( fileName, 'r' )
lines = file.readlines()
file.close()
# to record tapes' index in dictionary keys
tapeIndex = 0
# to remove the tapes' index that has been inserted/removed
currentIndex = 0
# to find out which tape will not be called in near future
farthestIndex = 0
# to record the index of this tape
farthestTapeIndex = 0
tapesD = {}
isFound = False
for line in lines:
# clean up the file
line = line.replace( " ", " " )
line = line.replace( " ", " " )
line = line.replace( " ", " " )
fields = line.split(" ")
md = fields[1]
tape = fields[2]
if md != "DISMOUNT":
# record all tapes'index in dictionary
if tape not in tapesD.keys():
tapesD[ tape ] = [tapeIndex]
else:
tapesD[ tape ].append(tapeIndex)
tapeIndex += 1
displayDict(tapesD)
for line in lines :
line = line.replace( " ", " " )
line = line.replace( " ", " " )
line = line.replace( " ", " " )
fields = line.split(" ")
md = fields[1]
tape = fields[2]
if md != "DISMOUNT":
# if tape requested is in drive
if tape in drive:
hit += 1
index = drive.index(tape)
drive = [tape] +drive[0:index]+drive[index+1:]
if DEBUG: print( " %s --> Hit" % str(drive) )
# if tape is not in drive and drive has empty space
elif tape not in drive and drive[noDrives-1]== -1:
miss += 1
drive = [tape] + drive[0:-1]
if DEBUG: print( " %s --> Miss" % str( drive ) )
# if tape is not in drive and drive is full
elif tape not in drive:
miss += 1
# check if any tape will never be needed again
# if found, replace it with the new tape
for k in range(0,len(drive)):
if len(tapesD[drive[k]])==0:
drive[k] = tape
isFound = True
if DEBUG: print( " %s --> Miss (isFound:T)" % str( drive ) )
break
# if all tapes will be needed again
# find out which tape will be called relatively late
if isFound == False:
for k in range(0,len(drive)):
if farthestIndex < tapesD[drive[k]][0]:
farthestIndex = tapesD[drive[k]][0]
# record the index of the tape in drive,helping the future replacement
farthestTapeIndex = k
if DEBUG: print( " farthestTapeIndex =", k )
drive[farthestTapeIndex] = tape
if DEBUG: print( " Updated drive =", str( drive ) )
# remove the tape index that has already been inserted
tapesD[ tape ].remove(currentIndex)
# update which tape is currently dealt with
currentIndex +=1
# set the boolean to its original state
isFound = False
if DEBUG: print( "===> current index %d: current tape: %2s " % (currentIndex, tape) )
if DEBUG: displayDict( tapesD )
total_request = hit + miss
raw_percent = 100*(miss/total_request)
# miss ratio,precision to 2 decimal places
percent = round(raw_percent,2)
# current date time
date = datetime.datetime.now().strftime("%Y%m%d %H:%M")
f = open("results.txt",'a')
f.write('\n')
#append result
result = (', '.join((date,fileName,str(noDrives),str(total_request),str(hit),str(miss),str(percent))))
result = "Belady " + result
f.write(result)
f.close()
print(result)
main()
- Generated a graph showing miss ratio of LRU for 5 traces.