Querying iRODS

Adding specific queries from the command line

bash escaping can cause havok when loading in a SQl query, so use this approach to work around that.

# create file e.g.
# vim lastWeekZeroLengthUnintentional.sql
# put that into an environment variable
QUERY=$(cat lastWeekZeroLengthUnintentional.sql )
# use the env varaible as a source for the query
iadmin asq "${QUERY}" lastWeekZeroLengthUnintentional
# run the query
iquest --no-page --sql lastWeekZeroLengthUnintentional

Sending query output by email

iquest --no-page --sql lastWeekFilesSingleReplicaNotnoReplRoot | mail -s "Single Replica Files Created over the last week" -r "iRODS Bot irods@domain>" recipient@domain

Specific queries to monitor catalog state

Count of zero Length files where the checksum is not that of an empty file

select count(*) from r_data_main d where d.data_size = '0'
and to_timestamp(cast(d.create_ts as bigint)) > (NOW() - INTERVAL '7 DAY')
and d.data_id not in (select object_id from r_objt_metamap a, r_meta_main b
                        where a.meta_id = b.meta_id
                        and b.meta_attr_name = 'md5'
                        and b.meta_attr_value = 'd41d8cd98f00b204e9800998ecf8427e')
and d.resc_id not in (with recursive cte as (
                                                select r.resc_id, cast(r.resc_name as text) as resc_name, cast((case when r.resc_parent = '' then null else r.resc_parent end) as integer), 1 as level
                                                 from r_resc_main r
                                                union all
                                                select e.resc_id, c.resc_name || ';' || cast(e.resc_name as text) as resc_name,  cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer), c.level + 1
                                                from cte c
                                                join r_resc_main e on cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer) = c.resc_id
                                             )
                        select resc_id from cte z
                        where resc_name like 'noReplRoot%')

Count of zero Length files created in the last week where the checksum is not that of an empty file

select count(*) from r_data_main d where data_size = '0'
and d.data_id not in (select object_id from r_objt_metamap a, r_meta_main b 
                        where a.meta_id = b.meta_id
                        and b.meta_attr_name = 'md5'  
                        and b.meta_attr_value = 'd41d8cd98f00b204e9800998ecf8427e')
and d.resc_id not in (with recursive cte as (
                                                select r.resc_id, cast(r.resc_name as text) as resc_name, cast((case when r.resc_parent = '' then null else r.resc_parent end) as integer), 1 as level
                                                 from r_resc_main r 
                                                union all
                                                select e.resc_id, c.resc_name || ';' || cast(e.resc_name as text) as resc_name,  cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer), c.level + 1
                                                from cte c
                                                join r_resc_main e on cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer) = c.resc_id
                                             )
                        select resc_id from cte z
                        where resc_name like 'noReplRoot%')

List objects with a single replica where they are not in a tree that only has single replicas

A bit confusing this one, but impossible to do with GenQuery. The idea behind this query is that there are two trees in a Zone, one called 'root' and one called 'noReplRoot'. As you might surmise, the 'root' tree contains a 'replicate' composite tree hierachy, so there should not be any files with a single replica unless they are in the noReplResc tree. This query lists objects violating those conditions created in the past week;

select a.data_id, b.coll_name, a.data_name, to_timestamp(cast(a.create_ts as bigint)) as create_date  from r_data_main a, r_coll_main b
where a.coll_id = b.coll_id
and to_timestamp(cast(a.create_ts as bigint)) > (NOW() - INTERVAL '7 DAY')
and a.data_id in 
(select d.data_id from r_data_main d where resc_id not in (
        with recursive cte as (
                select r.resc_id, cast(r.resc_name as text) as resc_name, cast((case when r.resc_parent = '' then null else r.resc_parent end) as integer), 1 as level
                from r_resc_main r 
                union all
                select e.resc_id, c.resc_name || ';' || cast(e.resc_name as text) as resc_name,  cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer), c.level + 1
                from cte c
                join r_resc_main e on cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer) = c.resc_id)
        select resc_id from cte z
        where resc_name like 'noReplRoot%')
group by d.data_id having count(*) = 1)

As above but with no time limit

select a.data_id, b.coll_name, a.data_name, to_timestamp(cast(a.create_ts as bigint)) as create_date  from r_data_main a, r_coll_main b
where a.coll_id = b.coll_id
and a.data_id in 
(select d.data_id from r_data_main d where resc_id not in (
        with recursive cte as (
                select r.resc_id, cast(r.resc_name as text) as resc_name, cast((case when r.resc_parent = '' then null else r.resc_parent end) as integer), 1 as level
                from r_resc_main r 
                union all
                select e.resc_id, c.resc_name || ';' || cast(e.resc_name as text) as resc_name,  cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer), c.level + 1
                from cte c
                join r_resc_main e on cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer) = c.resc_id)
        select resc_id from cte z
        where resc_name like 'noReplRoot%')
group by d.data_id having count(*) = 1)
order by a.create_ts desc

As above but no time limit and just return a count (for monitoring)

select count(*) from  (
        select d.data_id from r_data_main d where resc_id not in (
                with recursive cte as (
                select r.resc_id, cast(r.resc_name as text) as resc_name, cast((case when r.resc_parent = '' then null else r.resc_parent end) as integer), 1 as level
                 from r_resc_main r 
                union all
                select e.resc_id, c.resc_name || ';' || cast(e.resc_name as text) as resc_name,  cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer), c.level + 1
                from cte c
                join r_resc_main e on cast((case when e.resc_parent = '' then null else e.resc_parent end) as integer) = c.resc_id
                )
                select resc_id from cte z
                where resc_name like 'noReplRoot%')
group by d.data_id having count(*) = 1) t