Python – RegEx examples

Searching for text in multiple files within a directory OR one file in a directory

This script searches the yum directory for all the repo files and searches for the URLs each repo goes to.

Search for text in multiple files

src_dict = ("/etc/yum.repos.d/") #Specify base directory
pattern = re.compile ('http\S+') #CPatter to search for

for yum_files in os.listdir(src_dict): # obtain list of files in directory
    files = os.path.join(src_dict, yum_files) #join the full path with the names of the files.
    strng = open(files) #We need to open the files
    for lines in strng.readlines(): #We then need to read the files
        if re.search(pattern, lines): #If we find the pattern we are looking for
            print re.split(r'=', lines)[1] #We split using as a delimeter the = sign.
            #INSERT WHATEVER CODE YOU WANT

Search for text in one file in a directory

file = 'file.csv'
drc = '/home/gmastrokostas/PycharmProjects/learning'
f = open( os.path.join(drc, file) )
for lines in f.readlines():
    searpatt = re.search(pattern, lines)
    if re.search(pattern,lines):
        print (searpatt.group())

Change one specific extension type of files in a directory that contains multiple types of extensions

import os
import re

src_drct='/home/gmastrokostas/tmp'

for files in os.listdir(src_drct):

    if files.endswith('.txt'):
        oldF = os.path.join(src_drct, files)
        #midF = re.split(r'\.', files)#This works too.
        midF = os.path.splitext(files) #It creates a list and splits the name of the file from the extension.
        newF = oldF.replace('.txt', '.py')#Simple replacement.
        out = os.rename(oldF, newF)

Search for specific types of files and rename the files or part of the files

The script finds files with a specific file extension and renames parts of the files it found.

import os
import re
 
src_drct='/home/gmastrokostas/tmp'
 
for files in os.listdir(src_drct):
 
    if files.endswith('.txt'): #Select all files with the .txt ext
        oldF = os.path.join(src_drct, files) #Join full path with files found
        midF = re.split(r'\_', files) #split files that contain underscore
        newF = oldF.replace('_01012015', '_01012014') #create the replacement var
        out = os.rename(oldF, newF) #start renaming.

Replace a string in multiple files with in a given directory

The script below will search for a string in files with in a directory and subdirectories. Before it starts replacing any text it will create a backup of the original file(s). Note the script does not into account possible duplicate files when copying the files in /tmp.

import re
import os
import shutil

drc = '/root/tmp'
backup = '/tmp'
pattern = re.compile('PYTHON')
oldstr = 'PYTHON'
newstr = 'Python'

for dirpath, dirname, filename in os.walk(drc):#Getting a list of the full paths of files
    for fname in filename:
        path = os.path.join(dirpath, fname) #Joining dirpath and filenames
        strg = open(path).read() #Opening the files for reading only
        if re.search(pattern, strg):#If we find the pattern ....
            #print path, strg
            shutil.copy2(path, backup) #we will create a backup of it
            strg = strg.replace(oldstr, newstr) #We will create the replacement condistion
            f = open(path, 'w') #We open the files with the WRITE option
            f.write(strg) # We are writing the the changes to the files
            f.close() #Closing the files

Replace a string in One file with in a given directory

 
oldstr = 'Time'
newstr = 'TIME'
file_path = '/home/gmastrokostas/PycharmProjects/learning/file.csv'
with open(file_path, 'r') as f:
   fread = f.read()
   strg  = fread.replace(oldstr, newstr)
   fopen = open(file_path, 'w')
   fopen.write(strg)
f.close()

Write text from file temp1.txt to named.txt (use the ‘a’ option if you want to append instead)

 
#!/usr/bin/python
import os
import re
with open("named.txt", "wt") as fout:
    with open("templ.txt", "rt") as fin:
        for line in fin:
            fout.write(line)

Search for a string in a file and display the Result

 
import re
text="XXX"
file ='templ.txt'
for line in open(file):
    if text in line:
        print line

Search for a string by using compile in a file and display the Result

 
import re
text = "XXX"
pattern = re.compile(text)
file = 'templ.txt'
of = open(file)
for line in of.readlines():
    if re.search(pattern,  line):
        print line

Append the contents of a file to another file

import re
import os
fo = open("templ.txt","r")
fi = open("named.txt","a")
for line in fo:
    fi.write(line)
    #print line
fi.close()
fo.close()

Isolating text field from a variable

text = '11:47:55.045 -T- MFS_TEST10 Received FIX Message 3  8=FIX.4.2|9=146|35=D|34=3|49=MFS_TEST10|52=20120726-15:47:55|56=CCG|115=YYYC|11=AAA 0981/07262012|54=1|38=100000|55=ACL|40=1|47=A|60=20120726-15:47:55|21=1|207=N|10=044|'
print re.split(r'\s', text)[2]
Output
MFS_TEST10

Isolating text fields from a file.

 import re
#!/usr/bin/python
import re
import os
import fnmatch
fo = open("templ.txt", "r")
for line in fo:
        print line
        print re.split(r'\s', line, re.I|re.M)[1]
Provided the text with in the file was "Road 123 Str NY USA 11214"
Output 123

Searching for specific type files.

 

filenames = ['httpd.conf', 'samba.conf', 'header.h', 'socket.net']

[loop for loop in filenames if loop.endswith(('.h'))]
for loop in filenames:
    if loop.endswith('.h') or loop.startswith('httpd'):
        print loop
Output
httpd.conf
header.h

Searching through a List

 
#Great use with os.dirlist
addresses = [
    '5412 N CLARK ST',
    '1060 W ADDISON ST',
    '1039 W GRANVILLE AVE',
    '2122 N CLARK ST',
    '4802 N BROADWAY',
]

for loop in addresses:
    if fnmatch.fnmatchcase(loop, '* ST'):
        print loop
Output
5412 N CLARK ST
1060 W ADDISON ST
 2122 N CLARK ST

Print specific fields from a string

 
text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
pattern = re.compile('Today\s\w+\s\d+\W\d+\W\d+', re.IGNORECASE)
searpatt = re.search(pattern, text)
print (searpatt.group())

Output

Today is 11/27/2012

Replace specific fields of text

print text
#!/usr/bin/python
import re

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
newt = "Yesterday"
pattern = re.compile('Today\s\w+\s\d+\W\d+\W\d+', re.IGNORECASE)
searpatt = re.search(pattern, text)
print re.sub(pattern, newt, text)

Output

Yesterday. PyCon starts 3/13/2013.

Print a specific element of the split you created

#!/usr/bin/python
import re

text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
newt = "Yesterday"
pattern = re.compile('Today\s\w+\s\d+\W\d+\W\d+', re.IGNORECASE)
searpatt = re.search(pattern, text)
print text
print re.split(pattern, text )[1]

PyCon starts 3/13/2013.

Replace strings in a file

#!/usr/bin/python
import os
import re
text_ip = "#listen_addresses = 'localhost'"
        subtext_ip = "listen_addresses = '10.0.0.27'"
        text_port = "#port = 5432"
        subtext_port = "port = 5432"

        s = open("/home/postgres/postgres94/postgresql.conf").read()
        s = s.replace(text_ip, subtext_ip)
        s = s.replace(text_port, subtext_port)
        f = open("/home/postgres/postgres94/postgresql.conf", 'w')
        f.write(s)
        f.close()
Share Button

Python/Postgresql – Query Database and present data in a plot

How to query a database using a Python script and present the data on a plot.

import pandas.io.data
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame
import datetime
import pandas.io.data
import psycopg2
import re
import os


conn = psycopg2.connect("host='172.31.98.161' dbname='servers' user='seeker'")
cur = conn.cursor() #Create the cursor
cur.execute("select total_ram_raw, used_ram_raw, time_captured  from server_perf  where hostname='localhost.localdomain'")
rows = cur.fetchall()
conn.close()

df = pd.DataFrame(rows, columns=['TotalRam','UsedRAM', 'Time'])

df.to_csv("file.csv")
df.TotalRam = df.TotalRam.astype(float)
df.UsedRAM  = df.UsedRAM.astype(float)

df = pd.read_csv('file.csv', index_col='Time')
df[['TotalRam', 'UsedRAM']].plot()
plt.show()
Share Button

Python /PostgresqSQL 9.4 – Server Performance Data Capture V.2

In this second version, the scripts which capture data for the servers are using classes. In addition the the script that captures dynamic data from the remote servers captures additional data. In specific it captures RAM, HD usage in raw numbers and not only “humanized” format. The humanized fields were not taken out. As a result the appropriate tables had to be modified, which means the schema has changed as well. Also, the database and the scripts are now being installed via puppet. The Puppet manifests are far from polished. They need more work but they do work.

These scripts gather static and dynamic information from servers and insert that data into a PostgresSQL database. The static information is information that unless a major upgrade takes place it never changes. They Dynamic data is performance data of the servers. The purpose of the static data is to be able to query for dynamic information which being inserted to the database every X amount of minutes via cron.

These scripts work only on Linux/Unix based machines.

The PUPPET modules used to install the Database and the scripts are located here

The static information for the remote servers are as follows :

hostname
iface
ipaddress
OS
OSRel
OSKern
total_M
brand
Hz
cores
arch

The dynamic information for the remote servers are as follows :

hostname
iface
ipaddress
total_ram_hum
used_ram_hum
total_ram_raw
used_ram_raw
used_ram_perc
total_HD_hum
used_HD_hum
total_HD_raw
used_HD_raw
used_HD_perc
cpu_use_perc
swap_used_hum
swap_total_hum
swap_perc
swap_used_raw
swap_total_raw

The static.py script will need to be run only one time on the remote servers or when a major upgrade occurs that might change configuration regarding RAM, Partitions, IP, Operating System (even an upgrade), CPU, NIC replacement.

The dynamic.py script will be run on the remote servers via cron. It is the script that captures information which is being constantly changed, like memory, storage, swap usage. All this data is sent for insertion to the remote database. The script executes it self via cron and then sent to be inserted into a PostgreSQL database.

In both scripts data is entered into a dictionary and then a connection to the database is created in order to insert the data.

The Static.py script

#!/usr/bin/python
import psutil
import os
import math
import platform
import subprocess
import socket
import psycopg2
import netifaces as ni
import humanize
from cpuinfo import cpuinfo


class Static():
    def __init__(self):
        #NIC INFO
        self.hostname   = socket.gethostname()
        self.iface      = ni.interfaces()[1]
        self.ipaddress  = ni.ifaddresses(self.iface)[2][0]['addr']
        #---OS INFO
        #For Linux (RH-Debian) Operating Systems
        self.distro  = platform.dist()[0]
        self.release = platform.dist()[1]
        self.kernel  = platform.release()
        #For Windows Operating Systems
        self.osinfo_2_os    = platform.uname()[0]
        self.osinfo_2_ver   = platform.uname()[2]
        self.osinfo_2_rel   = platform.uname()[3]
        #----RAM INFO
        raw_totalM = psutil.virtual_memory().total
        self.total_M    = humanize.naturalsize(raw_totalM)
        #----CPU INFO
        self.info       = cpuinfo.get_cpu_info()
        self.brand      = self.info['brand']
        self.Hz         = self.info['hz_advertised']
        self.cores        = self.info['count']
        self.arch       = self.info['bits']

    def get_OS_make(self):
       if platform.system()  =="Linux":
           return self.distro, self.release, self.kernel
       elif platform.system()     =="Windows":
           return self.osinfo_2_os, self.osinfo_2_ver, self.osinfo_2_rel

info = Static()



hostname  = info.hostname
iface     = info.iface
ipaddress = info.ipaddress
OS        = info.get_OS_make()[0]
OSRel     = info.get_OS_make()[1]
OSKern    = info.get_OS_make()[2]
total_M   = info.total_M
brand     = info.brand
Hz        = info.Hz
cores     = info.cores
arch      = info.arch




#Create the Database PostgreSQL 9.4 connection.
conn = psycopg2.connect("host='172.31.98.161' dbname='servers' user='seeker'")
cur = conn.cursor() #Create the cursor
#Create a Dictionary to pass the value of each function.
server_info = {'hostname': hostname, 'iface':iface, 'ipaddress': ipaddress, 'OS': OS, 'OSRel': OSRel, 'OSKern': OSKern, 'total_M': total_M, 'brand': brand, 'Hz':Hz, 'cores': cores, 'arch': arch}
cur.execute("INSERT INTO servers(hostname, iface, ipaddress, OS, OSRel, OSKern, total_M, brand, Hz, cores, arch) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (hostname, iface, ipaddress, OS, OSRel, OSKern, total_M, brand, Hz, cores, arch))
#If this is not present the changes will not get commited.
conn.commit()

The Dynamic.py srcipt

#THIS FILE IS MANAGED BY PUPPET
#!/usr/bin/python
import psutil
import os
import math
import platform
import subprocess
import socket
import psycopg2
import netifaces as ni
import humanize
from cpuinfo import cpuinfo

class Dynamic():
    def __init__(self):
        #NIC INFO
        self.hostname   = socket.gethostname()
        self.iface      = ni.interfaces()[1]
        self.ipaddress  = ni.ifaddresses(self.iface)[2][0]['addr']
        #RAM USAGE INFO-------------------------------------------------------------------
        self.total_ram_hum       = humanize.naturalsize(psutil.virtual_memory().total)
        self.used_ram_hum        = humanize.naturalsize(psutil.virtual_memory().used)
        #---------Raw info
        self.total_ram_raw       = (psutil.virtual_memory().total)
        self.used_ram_raw        = (psutil.virtual_memory().used)
        self.used_ram_perc       = psutil.virtual_memory().percent
        #HD USAGE INFO-------------------------------------------------------------------
        self.total_HD_hum        = humanize.naturalsize(psutil.disk_usage('/').total)
        self.used_HD_hum         = humanize.naturalsize(psutil.disk_usage('/').used)
        #---------Raw info
        self.total_HD_raw        =(psutil.disk_usage('/').total)
        self.used_HD_raw         =(psutil.disk_usage('/').used)
        self.used_HD_perc        = psutil.disk_usage('/').percent
        #CPU USAGE INFO-------------------------------------------------------------------
        self.cpu_use_perc        = psutil.cpu_percent()
        #SWAP USAGE INFO-------------------------------------------------------------------
        self.swap_used_hum           = humanize.naturalsize(psutil.swap_memory().used)
        self.swap_total_hum          = humanize.naturalsize(psutil.swap_memory().total)
        self.swap_perc               = psutil.swap_memory()[3]
        #---------Raw info
        self.swap_used_raw           = (psutil.swap_memory().used)
        self.swap_total_raw          = (psutil.swap_memory().total)
    def export_to_csv(self):
        print self.hostname
info = Dynamic()

hostname            = info.hostname
iface               = info.iface
ipaddress           = info.ipaddress
total_ram_hum       = info.total_ram_hum
used_ram_hum        = info.used_ram_hum
total_ram_raw       = info.total_ram_raw
used_ram_raw        = info.used_ram_raw
used_ram_perc       = info.used_ram_perc
total_HD_hum        = info.total_HD_hum
used_HD_hum         = info.used_HD_hum
total_HD_raw        = info.total_HD_raw
used_HD_raw         = info.used_HD_raw
used_HD_perc        = info.used_HD_perc
cpu_use_perc        = info.cpu_use_perc
swap_used_hum       = info.swap_used_hum
swap_total_hum      = info.swap_total_hum
swap_perc           = info.swap_perc
swap_used_raw       = info.swap_used_raw
swap_total_raw      = info.swap_total_raw


conn = psycopg2.connect("host='172.31.98.161' dbname='servers' user='seeker'")
cur = conn.cursor() #Create the cursor
#Create a Dictionary to pass the value of each function.
server_info = {'hostname':hostname, 'iface': iface,'ipaddress': ipaddress, 'total_ram_hum': total_ram_hum, 'used_ram_hum': used_ram_hum, 'total_ram_raw': total_ram_raw, 'used_ram_raw':used_ram_raw,'used_ram_perc': used_ram_perc, 'HD_hum': total_HD_hum, 'used_HD_hum': used_HD_hum, 'total_HD_raw': total_HD_raw, 'used_HD_raw':used_HD_raw, 'used_HD_perc': used_HD_perc, 'cpu_use_perc': cpu_use_perc,'swap_used_hum':swap_used_hum, 'swap_total_hum': swap_total_hum, 'swap_perc': swap_perc, 'swap_used_raw': swap_used_raw, 'swap_total_raw': swap_total_raw}
cur.execute("INSERT INTO SERVER_PERF(hostname, iface, ipaddress, total_ram_hum, used_ram_hum, total_ram_raw, used_ram_raw, used_ram_perc, total_HD_hum, used_HD_hum, total_HD_raw,used_HD_raw,used_HD_perc, cpu_use_perc,swap_used_hum, swap_total_hum, swap_perc, swap_used_raw, swap_total_raw) VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (hostname, iface, ipaddress, total_ram_hum, used_ram_hum, total_ram_raw, used_ram_raw,used_ram_perc, total_HD_hum, used_HD_hum, total_HD_raw, used_HD_raw, used_HD_perc, cpu_use_perc, swap_used_hum, swap_total_hum, swap_perc, swap_used_raw, swap_total_raw))
#If this is not present the changes will not get commited.
conn.commit()

The new Database schema is as follows:

pg_dump -U seeker -d servers -s -h 172.31.98.161 > servers_db_schema

--
CREATE DATABASE SERVERS;
CREATE ROLE seeker WITH PASSWORD 'Password!';
ALTER DATABASE SERVERS OWNER TO seeker;
ALTER ROLE seeker WITH LOGIN;
GRANT ALL PRIVILEGES ON DATABASE SERVERS to seeker;
-- PostgreSQL database dump
--

SET statement_timeout = 0;
SET lock_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;

--
-- Name: plpgsql; Type: EXTENSION; Schema: -; Owner:
--

CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;

--
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner:
--

COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';

SET search_path = public, pg_catalog;

SET default_tablespace = '';

SET default_with_oids = false;

--
-- Name: server_perf; Type: TABLE; Schema: public; Owner: seeker; Tablespace:
--

CREATE TABLE server_perf (
hostname text NOT NULL,
iface text,
ipaddress inet NOT NULL,
total_ram_hum text,
used_ram_hum text,
total_ram_raw numeric(30,2),
used_ram_raw numeric(30,2),
used_ram_perc text,
total_hd_hum text,
used_hd_hum text,
total_hd_raw numeric(30,2),
used_hd_raw numeric(30,2),
used_hd_perc text,
cpu_use_perc text,
swap_used_hum text,
swap_total_hum text,
swap_perc text,
swap_used_raw numeric(30,2),
swap_total_raw numeric(30,2),
time_captured timestamp without time zone DEFAULT now()
);

ALTER TABLE server_perf OWNER TO seeker;

--
-- Name: servers; Type: TABLE; Schema: public; Owner: seeker; Tablespace:
--

CREATE TABLE servers (
hostname text NOT NULL,
iface text,
ipaddress inet NOT NULL,
os text,
osrel text,
oskern text,
total_m text,
brand text,
hz text,
cores numeric(4,1),
arch text
);

ALTER TABLE servers OWNER TO seeker;

--
-- Name: pk_hostname; Type: CONSTRAINT; Schema: public; Owner: seeker; Tablespace:
--

ALTER TABLE ONLY servers
ADD CONSTRAINT pk_hostname PRIMARY KEY (hostname);

--
-- Name: server_perf_hostname_fkey; Type: FK CONSTRAINT; Schema: public; Owner: seeker
--

ALTER TABLE ONLY server_perf
ADD CONSTRAINT server_perf_hostname_fkey FOREIGN KEY (hostname) REFERENCES servers(hostname);

--
-- Name: public; Type: ACL; Schema: -; Owner: postgres
--

REVOKE ALL ON SCHEMA public FROM PUBLIC;
REVOKE ALL ON SCHEMA public FROM postgres;
GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO PUBLIC;

--
-- PostgreSQL database dump complete
--
Share Button

Python – Compare modification dates between files and find time difference

The script searches in a specific location for files and it gets in POSIX the time of modification of all files. It also creates a temp file (and write text in it) within the same directory (which gets deleted once the script exits). This temp file is used to get today’s modification date. Then all POSIX dates are converted into human readable format and a comparison is done between the temp file and the files we are examining. If the files we are examining are three months or older, then …. you can enter what ever custom action you want.

#!/usr/bin/python
#The script searches in a specific location for files and it gets in POSIX the time of modification of all files.
#It also creates a temp file (and write text in it)  within the same directory (which gets deleted once the script exits)
#This temp file is used to get today's modification date. Then all POSIX dates are converted into human readable format
#and a comparison is done between the temp file and the files we are examining. If the files we are examining are
#three months or older, then .... you can enter what ever custom action you want.

import os.path
import tempfile
import datetime
import dateutil.relativedelta
import dateutil

#Path of where we are looking for the files
drc = '/root/Music'

#We create a temp file so we can get today's date. File is deleted after script is done executing
tempF = tempfile.NamedTemporaryFile(dir=drc)#, delete=False)
tempF.write('something')#We put in some text in so we are sure to get a time modified.
fileT =  os.stat(tempF.name)[8] #We are getting the mtime aka time of modification
fileT_human = datetime.datetime.fromtimestamp(fileT)#We are converting it to human readeable

for dirpath, dirname, filename in os.walk(drc):  #we are going huntinf for...
        for fname in filename:                   #...files
                path = os.path.join(dirpath, fname) #full path of files
                mtime = os.stat(path)[8] #We are getting time of modification of all the files
                mtime_human  = datetime.datetime.fromtimestamp(mtime) #We humanize the date

                #We are comparing each file with the fileT_human in order to get the difference in date when it comes to modification time
                diff = dateutil.relativedelta.relativedelta (fileT_human, mtime_human)

                #Just a sample output. You can use years, months, days to do the comparison between files.
                #print "%d years, %d months, %d days, %d hours, %d minutes and %d seconds" % (diff.years, diff.months, diff.days, diff.hours, diff.minutes, diff.seconds)
                months = diff.months
                if  months > 3:
                        print "Deleting file ", path
                        #Put whatever action you want here.
                        os.remove(path) #it will delete files only
                else:
                        exit;
Share Button

Python – Replace a string in multiple files with in a directory

The script below will search for a string in files with in a directory and subdirectories. Before it starts replacing any text it will create a backup of the original file(s).

import re
import os
import shutil

drc = '/root/tmp'
backup = '/tmp'
pattern = re.compile('PYTHON')
oldstr = 'PYTHON'
newstr = 'Python'

for dirpath, dirname, filename in os.walk(drc):#Getting a list of the full paths of files
    for fname in filename:
        path = os.path.join(dirpath, fname) #Joining dirpath and filenames
        strg = open(path).read() #Opening the files for reading only
        if re.search(pattern, strg):#If we find the pattern ....
            #print path, strg
            shutil.copy2(path, backup) #we will create a backup of it
            strg = strg.replace(oldstr, newstr) #We will create the replacement condistion
            f = open(path, 'w') #We open the files with the WRITE option
            f.write(strg) # We are writing the the changes to the files
            f.close() #Closing the files

Share Button

Python – Get list of processes, their owners and RAM usage from a Linux system.

This script only works on Linux systems. It lists the running PIDs, the owners of the PIDs and the RAM usage of each PID.

import os, sys
from os.path import join,getsize
import humanize
import pwd
import psutil

pids = [int(pid) for pid in os.listdir('/proc') if pid.isdigit()]
for elements in pids:
    p = psutil.Process(elements)
    proc_name  = p.name()
    proc_stat_file = os.stat("/proc/%d" % elements)
    uid = proc_stat_file.st_uid
    username = pwd.getpwuid(uid)[0]
    human_size = humanize.naturalsize(p.memory_info_ex()[0], gnu=True)
    #print  p.memory_info_ex()[0]
    #print username,"\t\t", elements,"\t\t", p.name(), "\t\t", human_size
    print username.ljust(20), elements, p.name(), human_size.rjust(20)

Output Example

root                 17601 cupsd                 3.4M
gmastrokostas        23460 chrome               117.5M
gmastrokostas        24034 pithos                76.2M
Share Button

Python – Get a listing of all subdirectories and their size on a Linux system

The script asks for a directory from the user. It then will dive in all the subdirectories within that directory. It will list all subdirectories and their size. It does this by actually looking for files within the subdirectories and calculating the actual size (not block size) of all the files with in that subdirectory.

#!/usr/bin/python
import humanize
import os, sys
from os.path import join,getsize
import humanize
import pwd

def dir_list():
    list = []
    drct = raw_input(":Enter directory name. Use full path: ")
    for dirpath, dirnames, filenames in os.walk(drct, followlinks=False):
        for loop_dir in dirnames:
            path = os.path.join(dirpath, loop_dir) #Joins the names of directories with the actual path
            list.append(path) # Enters all the directories with the full path to a list

    return list  #Return the whole list so dir_size function can process it

def dir_size():
    
    returned_list = dir_list() #The list returned from the dir_list function

    for loop in returned_list: #Breaks down the list in strings in order for OS.WALK to be able to process it
        total = 0
        for dirpath, dirnames, filenames in os.walk(loop, followlinks= False): #grab the paths from the list. This is the same as asking a user to enter a path

            for f_name in filenames: #Dive into directories to view files
                path = os.path.join(dirpath, f_name) #Join path with filenames for the directory we are trying to find the size
                if os.path.islink(path): 
                    continue
                else:
                    total += os.path.getsize(path)  #Find size of actual files within the directory
                    human_size = humanize.naturalsize(total, gnu=True) 
        print "Size of directory  : ",loop, "is", human_size
        human_size = 0

dir_size()

OutPut Example

FUN 1:Enter directory name. Use full path: /home/gmastrokostas
Size of directory  :  /home/gmastrokostas/.mozilla is 16.1M
Size of directory  :  /home/gmastrokostas/.config is 69.9K
Size of directory  :  /home/gmastrokostas/.cache is 53.0M
Size of directory  :  /home/gmastrokostas/Desktop is 0
Size of directory  :  /home/gmastrokostas/Downloads is 332.9M
Size of directory  :  /home/gmastrokostas/Templates is 0
Size of directory  :  /home/gmastrokostas/Public is 0
Size of directory  :  /home/gmastrokostas/Documents is 218.6M
Size of directory  :  /home/gmastrokostas/Music is 0
Size of directory  :  /home/gmastrokostas/Pictures is 0
Size of directory  :  /home/gmastrokostas/Videos is 0
Size of directory  :  /home/gmastrokostas/.local is 1.2M
Share Button

Python – Centos 6 – PostgresSQL9.4 – Check if PostgresSQL is installed. If not, install it and setup a Database

Script check if postgresql is installed, if it is not it will go ahead and do the appropriate checks to see if the Postgresql YUM RPM packages installed and the proceed to install PostgresSQL. It then does the post configuration steps. It can also create a database with a user assigned to it.

#!/usr/bin/python
# -*- coding: utf-8 -*-
import psutil
import os
import subprocess
import psycopg2


def install_PostgresSQL():
    try:

        if subprocess.call(["rpm", "-q", "pgdg-centos94"]) == 0:
            print "POSTGRES-SQL IS ALREADY INSTALLED"

        elif subprocess.call(["rpm", "-q", "pgdg-centos94"]) == 1:
            subprocess.call(["yum", "localinstall", "-y", "http://yum.postgresql.org/9.4/redhat/rhel-6-x86_64/pgdg-centos94-9.4-1.noarch.rpm"])
            subprocess.call(["yum", "install", "-y", "postgresql94-contrib", "postgresql94-server"])
            print "#################################################################"
            post_install_PostgreSQL()

    except Exception as e:
        print e
        print "Install failed. Exiting with Error"


def post_install_PostgreSQL():
    try:
        path = "/etc/sysconfig/pgsql/"
        fname = 'postgresql-9.4'
        ##os.chdir(path)
        full_path = os.path.join(path, fname)
        file  = open(full_path, "w")
        file.write('GDATA=/home/postgres94/pgsql/data94\nPGLOG=/home/postgres94/pgsql/pgstartup94.log')
        file.close()
        subprocess.call(["/usr/pgsql-9.4/bin/postgresql94-setup", "initdb"])
        subprocess.call(["systemctl", "enable", "postgresql-9.4"])
        subprocess.call(["systemctl", "start", "postgresql-9.4"])
        create_Database()

    except Exception as e:
        print e
        print "Error in Post Install steps"



def create_Database():
    try:
        conn = psycopg2.connect("host='10.0.0.41' dbname='postgres' user='postgres'")
        cur = conn.cursor() #Create the cursor
        #cur.execute("""SELECT * from servers""")
        conn.autocommit = True
        cur.execute("""CREATE DATABASE test""")
        cur.execute("""CREATE ROLE george WITH PASSWORD 'YourPasswordHere'""")
        cur.execute("""GRANT ALL PRIVILEGES ON DATABASE test to george""")
        #If this is not present the changes will not get commited.
        conn.commit()
        #for row in cur: #Enable the following two lines to view query results
        #    print rows
    except Exception as e:
        print e



check_PostgresSQL()
create_Database()
Share Button

Python – Search for specific types of files and rename the files or part of the files

The script finds files with a specific file extension and renames parts of the files it found.

import os
import re
 
src_drct='/home/gmastrokostas/tmp'
 
for files in os.listdir(src_drct):
 
    if files.endswith('.txt'): #Select all files with the .txt ext
        oldF = os.path.join(src_drct, files) #Join full path with files found
        midF = re.split(r'\_', files) #split files that contain underscore
        newF = oldF.replace('_01012015', '_01012014') #create the replacement var
        out = os.rename(oldF, newF) #start renaming.
Share Button

Python – Change one specific extension type of files in a directory that contains multiple types of extensions

import os
import re

src_drct='/home/gmastrokostas/tmp'

for files in os.listdir(src_drct):

    if files.endswith('.txt'):
        oldF = os.path.join(src_drct, files)
        #midF = re.split(r'\.', files)#This works too.
        midF = os.path.splitext(files) #It creates a list and splits the name of the file from the extension.
        newF = oldF.replace('.txt', '.py')#Simple replacement.
        out = os.rename(oldF, newF)

Share Button